mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Fix for drmemory failure on I411ToARGB
Before I420ToARGB_Opt (594 ms) I422ToARGB_Opt (483 ms) I411ToARGB_Opt (748 ms) *** I444ToARGB_Opt (452 ms) I400ToARGB_Opt (218 ms) After I420ToARGB_Opt (591 ms) I422ToARGB_Opt (454 ms) I411ToARGB_Opt (502 ms) *** I444ToARGB_Opt (441 ms) I400ToARGB_Opt (216 ms) TBR=harryjin@google.com BUG=libyuv:525 Review URL: https://codereview.chromium.org/1459513002 .
This commit is contained in:
parent
ec4b258d4e
commit
5eefbe2330
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1539
|
Version: 1540
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1539
|
#define LIBYUV_VERSION 1540
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -2440,9 +2440,14 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Read 2 UV from 411, upsample to 8 UV.
|
// Read 2 UV from 411, upsample to 8 UV.
|
||||||
#define READYUV411 __asm { \
|
// drmemory fails with memory fault if pinsrw used. libyuv bug: 525
|
||||||
__asm pinsrw xmm0, [esi], 0 /* U */ \
|
// __asm pinsrw xmm0, [esi], 0 /* U */
|
||||||
__asm pinsrw xmm1, [esi + edi], 0 /* V */ \
|
// __asm pinsrw xmm1, [esi + edi], 0 /* V */
|
||||||
|
#define READYUV411_EBX __asm { \
|
||||||
|
__asm movzx ebx, word ptr [esi] /* U */ \
|
||||||
|
__asm movd xmm0, ebx \
|
||||||
|
__asm movzx ebx, word ptr [esi + edi] /* V */ \
|
||||||
|
__asm movd xmm1, ebx \
|
||||||
__asm lea esi, [esi + 2] \
|
__asm lea esi, [esi + 2] \
|
||||||
__asm punpcklbw xmm0, xmm1 /* UV */ \
|
__asm punpcklbw xmm0, xmm1 /* UV */ \
|
||||||
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
|
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
|
||||||
@ -2816,23 +2821,25 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
push esi
|
push esi
|
||||||
push edi
|
push edi
|
||||||
push ebx
|
push ebx
|
||||||
mov eax, [esp + 12 + 4] // Y
|
push ebp
|
||||||
mov esi, [esp + 12 + 8] // U
|
mov eax, [esp + 16 + 4] // Y
|
||||||
mov edi, [esp + 12 + 12] // V
|
mov esi, [esp + 16 + 8] // U
|
||||||
mov edx, [esp + 12 + 16] // abgr
|
mov edi, [esp + 16 + 12] // V
|
||||||
mov ebx, [esp + 12 + 20] // yuvconstants
|
mov edx, [esp + 16 + 16] // abgr
|
||||||
mov ecx, [esp + 12 + 24] // width
|
mov ebp, [esp + 16 + 20] // yuvconstants
|
||||||
|
mov ecx, [esp + 16 + 24] // width
|
||||||
sub edi, esi
|
sub edi, esi
|
||||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||||
|
|
||||||
convertloop:
|
convertloop:
|
||||||
READYUV411
|
READYUV411_EBX
|
||||||
YUVTORGB(ebx)
|
YUVTORGB(ebp)
|
||||||
STOREARGB
|
STOREARGB
|
||||||
|
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
jg convertloop
|
jg convertloop
|
||||||
|
|
||||||
|
pop ebp
|
||||||
pop ebx
|
pop ebx
|
||||||
pop edi
|
pop edi
|
||||||
pop esi
|
pop esi
|
||||||
|
|||||||
@ -415,8 +415,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
|||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||||
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
||||||
const int kSizeUV = \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_64(src_u, kSizeUV + OFF); \
|
align_buffer_64(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_64(src_v, kSizeUV + OFF); \
|
align_buffer_64(src_v, kSizeUV + OFF); \
|
||||||
@ -433,15 +433,15 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
|||||||
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
|
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_u + OFF, kStrideUV, \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_v + OFF, kStrideUV, \
|
||||||
dst_argb_c + OFF, kStrideB, \
|
dst_argb_c + OFF, kStrideB, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_u + OFF, kStrideUV, \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_v + OFF, kStrideUV, \
|
||||||
dst_argb_opt + OFF, kStrideB, \
|
dst_argb_opt + OFF, kStrideB, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
@ -524,8 +524,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
|||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||||
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
||||||
const int kSizeUV = \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_64(src_u, kSizeUV + OFF); \
|
align_buffer_64(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_64(src_v, kSizeUV + OFF); \
|
align_buffer_64(src_v, kSizeUV + OFF); \
|
||||||
@ -544,16 +544,16 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
|||||||
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
|
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_u + OFF, kStrideUV, \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_v + OFF, kStrideUV, \
|
||||||
src_a + OFF, kWidth, \
|
src_a + OFF, kWidth, \
|
||||||
dst_argb_c + OFF, kStrideB, \
|
dst_argb_c + OFF, kStrideB, \
|
||||||
kWidth, NEG kHeight, ATTEN); \
|
kWidth, NEG kHeight, ATTEN); \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_u + OFF, kStrideUV, \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_v + OFF, kStrideUV, \
|
||||||
src_a + OFF, kWidth, \
|
src_a + OFF, kWidth, \
|
||||||
dst_argb_opt + OFF, kStrideB, \
|
dst_argb_opt + OFF, kStrideB, \
|
||||||
kWidth, NEG kHeight, ATTEN); \
|
kWidth, NEG kHeight, ATTEN); \
|
||||||
@ -598,32 +598,31 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
|||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
const int kStrideB = kWidth * BPP_B; \
|
const int kStrideB = kWidth * BPP_B; \
|
||||||
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_64(src_uv, \
|
align_buffer_64(src_uv, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
|
|
||||||
align_buffer_64(dst_argb_c, kStrideB * kHeight); \
|
align_buffer_64(dst_argb_c, kStrideB * kHeight); \
|
||||||
align_buffer_64(dst_argb_opt, kStrideB * kHeight); \
|
align_buffer_64(dst_argb_opt, kStrideB * kHeight); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kWidth; ++j) \
|
for (int j = 0; j < kWidth; ++j) \
|
||||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
|
for (int j = 0; j < kStrideUV * 2; ++j) { \
|
||||||
src_uv[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j + OFF] = \
|
src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \
|
||||||
(fastrand() & 0xff); \
|
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
memset(dst_argb_c, 1, kStrideB * kHeight); \
|
memset(dst_argb_c, 1, kStrideB * kHeight); \
|
||||||
memset(dst_argb_opt, 101, kStrideB * kHeight); \
|
memset(dst_argb_opt, 101, kStrideB * kHeight); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||||
src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \
|
src_uv + OFF, kStrideUV * 2, \
|
||||||
dst_argb_c, kWidth * BPP_B, \
|
dst_argb_c, kWidth * BPP_B, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||||
src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \
|
src_uv + OFF, kStrideUV * 2, \
|
||||||
dst_argb_opt, kWidth * BPP_B, \
|
dst_argb_opt, kWidth * BPP_B, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
@ -677,48 +676,49 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
|
|||||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||||
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kStride = \
|
const int kStride = \
|
||||||
(SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
|
(kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
|
||||||
align_buffer_64(src_argb, kStride * kHeight + OFF); \
|
align_buffer_64(src_argb, kStride * kHeight + OFF); \
|
||||||
align_buffer_64(dst_y_c, kWidth * kHeight); \
|
align_buffer_64(dst_y_c, kWidth * kHeight); \
|
||||||
align_buffer_64(dst_u_c, \
|
align_buffer_64(dst_u_c, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
kStrideUV * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_64(dst_v_c, \
|
align_buffer_64(dst_v_c, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
kStrideUV * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_64(dst_y_opt, kWidth * kHeight); \
|
align_buffer_64(dst_y_opt, kWidth * kHeight); \
|
||||||
align_buffer_64(dst_u_opt, \
|
align_buffer_64(dst_u_opt, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
kStrideUV * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_64(dst_v_opt, \
|
align_buffer_64(dst_v_opt, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
kStrideUV * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_y_c, 1, kWidth * kHeight); \
|
memset(dst_y_c, 1, kWidth * kHeight); \
|
||||||
memset(dst_u_c, 2, \
|
memset(dst_u_c, 2, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_v_c, 3, \
|
memset(dst_v_c, 3, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_y_opt, 101, kWidth * kHeight); \
|
memset(dst_y_opt, 101, kWidth * kHeight); \
|
||||||
memset(dst_u_opt, 102, \
|
memset(dst_u_opt, 102, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_v_opt, 103, \
|
memset(dst_v_opt, 103, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kStride; ++j) \
|
for (int j = 0; j < kStride; ++j) \
|
||||||
src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
|
src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
|
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
|
||||||
dst_y_c, kWidth, \
|
dst_y_c, kWidth, \
|
||||||
dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
dst_u_c, kStrideUV, \
|
||||||
dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
dst_v_c, kStrideUV, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
|
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
|
||||||
dst_y_opt, kWidth, \
|
dst_y_opt, kWidth, \
|
||||||
dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
dst_u_opt, kStrideUV, \
|
||||||
dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
dst_v_opt, kStrideUV, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
for (int i = 0; i < kHeight; ++i) { \
|
for (int i = 0; i < kHeight; ++i) { \
|
||||||
@ -728,19 +728,17 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
|||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
for (int j = 0; j < kStrideUV; ++j) { \
|
||||||
EXPECT_NEAR(static_cast<int>(dst_u_c[i * \
|
EXPECT_NEAR(static_cast<int>(dst_u_c[i * kStrideUV + j]), \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) + j]), \
|
static_cast<int>(dst_u_opt[i * kStrideUV + j]), DIFF); \
|
||||||
static_cast<int>(dst_u_opt[i * \
|
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) + j]), DIFF); \
|
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
for (int j = 0; j < kStrideUV; ++j) { \
|
||||||
EXPECT_NEAR(static_cast<int>(dst_v_c[i * \
|
EXPECT_NEAR(static_cast<int>(dst_v_c[i * \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) + j]), \
|
kStrideUV + j]), \
|
||||||
static_cast<int>(dst_v_opt[i * \
|
static_cast<int>(dst_v_opt[i * \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) + j]), DIFF); \
|
kStrideUV + j]), DIFF); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
free_aligned_buffer_64(dst_y_c); \
|
free_aligned_buffer_64(dst_y_c); \
|
||||||
@ -796,35 +794,28 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
|||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \
|
const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \
|
||||||
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
align_buffer_64(src_argb, kStride * kHeight + OFF); \
|
align_buffer_64(src_argb, kStride * kHeight + OFF); \
|
||||||
align_buffer_64(dst_y_c, kWidth * kHeight); \
|
align_buffer_64(dst_y_c, kWidth * kHeight); \
|
||||||
align_buffer_64(dst_uv_c, \
|
align_buffer_64(dst_uv_c, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
|
||||||
align_buffer_64(dst_y_opt, kWidth * kHeight); \
|
align_buffer_64(dst_y_opt, kWidth * kHeight); \
|
||||||
align_buffer_64(dst_uv_opt, \
|
align_buffer_64(dst_uv_opt, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kStride; ++j) \
|
for (int j = 0; j < kStride; ++j) \
|
||||||
src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
|
src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
|
||||||
memset(dst_y_c, 1, kWidth * kHeight); \
|
memset(dst_y_c, 1, kWidth * kHeight); \
|
||||||
memset(dst_uv_c, 2, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
|
||||||
memset(dst_y_opt, 101, kWidth * kHeight); \
|
memset(dst_y_opt, 101, kWidth * kHeight); \
|
||||||
memset(dst_uv_opt, 102, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
|
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
|
||||||
dst_y_c, kWidth, \
|
dst_y_c, kWidth, dst_uv_c, kStrideUV * 2, \
|
||||||
dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \
|
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
|
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
|
||||||
dst_y_opt, kWidth, \
|
dst_y_opt, kWidth, \
|
||||||
dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \
|
dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \
|
||||||
kWidth, NEG kHeight); \
|
|
||||||
} \
|
} \
|
||||||
int max_diff = 0; \
|
int max_diff = 0; \
|
||||||
for (int i = 0; i < kHeight; ++i) { \
|
for (int i = 0; i < kHeight; ++i) { \
|
||||||
@ -839,12 +830,10 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
|
|||||||
} \
|
} \
|
||||||
EXPECT_LE(max_diff, 4); \
|
EXPECT_LE(max_diff, 4); \
|
||||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
|
for (int j = 0; j < kStrideUV * 2; ++j) { \
|
||||||
int abs_diff = \
|
int abs_diff = \
|
||||||
abs(static_cast<int>(dst_uv_c[i * \
|
abs(static_cast<int>(dst_uv_c[i * kStrideUV * 2 + j]) - \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
|
static_cast<int>(dst_uv_opt[i * kStrideUV * 2 + j])); \
|
||||||
static_cast<int>(dst_uv_opt[i * \
|
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
|
|
||||||
if (abs_diff > max_diff) { \
|
if (abs_diff > max_diff) { \
|
||||||
max_diff = abs_diff; \
|
max_diff = abs_diff; \
|
||||||
} \
|
} \
|
||||||
@ -1366,8 +1355,9 @@ TEST_F(LibYUVConvertTest, CropNV12) {
|
|||||||
((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1;
|
((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1;
|
||||||
const int kDestWidth = benchmark_width_;
|
const int kDestWidth = benchmark_width_;
|
||||||
const int kDestHeight = benchmark_height_ - crop_y * 2;
|
const int kDestHeight = benchmark_height_ - crop_y * 2;
|
||||||
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);
|
||||||
const int sample_size = kWidth * kHeight +
|
const int sample_size = kWidth * kHeight +
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) *
|
kStrideUV *
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;
|
SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;
|
||||||
align_buffer_64(src_y, sample_size);
|
align_buffer_64(src_y, sample_size);
|
||||||
uint8* src_uv = src_y + kWidth * kHeight;
|
uint8* src_uv = src_y + kWidth * kHeight;
|
||||||
@ -1392,7 +1382,7 @@ TEST_F(LibYUVConvertTest, CropNV12) {
|
|||||||
src_y[i] = (fastrand() & 0xff);
|
src_y[i] = (fastrand() & 0xff);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) *
|
for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) *
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X)) * 2; ++i) {
|
kStrideUV) * 2; ++i) {
|
||||||
src_uv[i] = (fastrand() & 0xff);
|
src_uv[i] = (fastrand() & 0xff);
|
||||||
}
|
}
|
||||||
memset(dst_y, 1, kDestWidth * kDestHeight);
|
memset(dst_y, 1, kDestWidth * kDestHeight);
|
||||||
@ -1416,8 +1406,8 @@ TEST_F(LibYUVConvertTest, CropNV12) {
|
|||||||
libyuv::kRotate0, libyuv::FOURCC_NV12);
|
libyuv::kRotate0, libyuv::FOURCC_NV12);
|
||||||
|
|
||||||
NV12ToI420(src_y + crop_y * kWidth, kWidth,
|
NV12ToI420(src_y + crop_y * kWidth, kWidth,
|
||||||
src_uv + (crop_y / 2) * SUBSAMPLE(kWidth, SUBSAMP_X) * 2,
|
src_uv + (crop_y / 2) * kStrideUV * 2,
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * 2,
|
kStrideUV * 2,
|
||||||
dst_y, kDestWidth,
|
dst_y, kDestWidth,
|
||||||
dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X),
|
dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X),
|
||||||
dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X),
|
dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X),
|
||||||
@ -1548,8 +1538,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \
|
|||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||||
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
||||||
const int kSizeUV = \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_64(src_u, kSizeUV + OFF); \
|
align_buffer_64(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_64(src_v, kSizeUV + OFF); \
|
align_buffer_64(src_v, kSizeUV + OFF); \
|
||||||
@ -1566,15 +1556,15 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \
|
|||||||
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
|
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \
|
||||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_u + OFF, kStrideUV, \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_v + OFF, kStrideUV, \
|
||||||
dst_argb_c + OFF, kStrideB, \
|
dst_argb_c + OFF, kStrideB, \
|
||||||
NULL, kWidth, NEG kHeight); \
|
NULL, kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \
|
||||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_u + OFF, kStrideUV, \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_v + OFF, kStrideUV, \
|
||||||
dst_argb_opt + OFF, kStrideB, \
|
dst_argb_opt + OFF, kStrideB, \
|
||||||
NULL, kWidth, NEG kHeight); \
|
NULL, kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
@ -1698,8 +1688,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
|
|||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
const int kStrideB = kWidth * BPP_B; \
|
const int kStrideB = kWidth * BPP_B; \
|
||||||
const int kSizeUV = \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
align_buffer_64(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_64(src_u, kSizeUV + OFF); \
|
align_buffer_64(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_64(src_v, kSizeUV + OFF); \
|
align_buffer_64(src_v, kSizeUV + OFF); \
|
||||||
@ -1714,8 +1704,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
|
|||||||
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
|
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
|
||||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_u + OFF, kStrideUV, \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_v + OFF, kStrideUV, \
|
||||||
dst_argb_b + OFF, kStrideB, \
|
dst_argb_b + OFF, kStrideB, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
@ -1727,8 +1717,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
|
|||||||
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
||||||
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
||||||
FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \
|
FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \
|
||||||
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_u + OFF, kStrideUV, \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
|
src_v + OFF, kStrideUV, \
|
||||||
dst_argb_c + OFF, kStrideC, \
|
dst_argb_c + OFF, kStrideC, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
/* Convert B to C */ \
|
/* Convert B to C */ \
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user