From 26173eb73ec1f52fe4b405b760b1e9f2a2d2d04a Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Wed, 22 Nov 2017 15:11:11 -0800 Subject: [PATCH] H010ToAR30 for 10 bit bt.709 YUV to 30 bit RGB This version of the H010ToAR30 provides a 3 step conversion Convert16To8Row_AVX2 H420ToARGB_AVX2 ARGBToAR30_AVX2 Low level function added to convert 16 bit to 8 bit using multiply to adjust 10 bit or other bit depths and then save the upper 16 bits. Bug: libyuv:751 Test: LibYUVPlanarTest.Convert16To8Row_Opt unittest added Change-Id: I9cc576fda8afa1003cb961d03e0e656e0b478f03 Reviewed-on: https://chromium-review.googlesource.com/783554 Commit-Queue: Frank Barchard Reviewed-by: richard winterton --- README.chromium | 2 +- include/libyuv/convert_argb.h | 13 ++++ include/libyuv/row.h | 11 ++- include/libyuv/version.h | 2 +- include/libyuv/video_common.h | 2 + source/convert_argb.cc | 130 ++++++++++++++++++++++++++++++++ source/row_common.cc | 20 +++++ source/row_gcc.cc | 133 ++++++++++++++++++++++----------- unit_test/convert_test.cc | 63 ++++++++++++++++ unit_test/planar_test.cc | 45 ++++++++++- unit_test/video_common_test.cc | 2 + 11 files changed, 372 insertions(+), 51 deletions(-) diff --git a/README.chromium b/README.chromium index 2ba21e58b..bd99afd2e 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1679 +Version: 1680 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index f43a5060b..1c89d9456 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -321,6 +321,19 @@ int H422ToABGR(const uint8* src_y, int width, int height); +// Convert H010 to AR30. +LIBYUV_API +int H010ToAR30(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + int width, + int height); + // BGRA little endian (argb in memory) to ARGB. LIBYUV_API int BGRAToARGB(const uint8* src_frame, diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 30b6e4c6a..743f6b154 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -278,6 +278,7 @@ extern "C" { (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) #define HAS_ARGBTOAR30ROW_AVX2 +#define HAS_CONVERT16TO8ROW_AVX2 #define HAS_MERGEUVROW_16_AVX2 #define HAS_MULTIPLYROW_16_AVX2 #endif @@ -1540,6 +1541,12 @@ void MultiplyRow_16_AVX2(const uint16* src_y, int width); void MultiplyRow_16_C(const uint16* src_y, uint16* dst_y, int scale, int width); +void Convert16To8Row_AVX2(const uint16* src_y, + uint8* dst_y, + int scale, + int width); +void Convert16To8Row_C(const uint16* src_y, uint8* dst_y, int scale, int width); + void CopyRow_SSE2(const uint8* src, uint8* dst, int count); void CopyRow_AVX(const uint8* src, uint8* dst, int count); void CopyRow_ERMS(const uint8* src, uint8* dst, int count); @@ -2419,9 +2426,7 @@ void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToAR30Row_Any_AVX2(const uint8* src_argb, - uint8* dst_rgb, - int width); +void ARGBToAR30Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 884f3c950..b2b65d135 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1679 +#define LIBYUV_VERSION 1680 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h index b09450653..ec520ef09 100644 --- a/include/libyuv/video_common.h +++ b/include/libyuv/video_common.h @@ -93,6 +93,7 @@ enum FourCC { FOURCC_J420 = FOURCC('J', '4', '2', '0'), FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc + FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. @@ -154,6 +155,7 @@ enum FourCCBpp { FOURCC_BPP_J420 = 12, FOURCC_BPP_J400 = 8, FOURCC_BPP_H420 = 12, + FOURCC_BPP_H010 = 24, FOURCC_BPP_MJPG = 0, // 0 means unknown. FOURCC_BPP_H264 = 0, FOURCC_BPP_IYUV = 12, diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 5007bdb97..feef641cd 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -428,6 +428,136 @@ int H422ToABGR(const uint8* src_y, width, height); } +// Convert 10 bit YUV to 10 bit RGB with matrix +static int H010ToAR30Matrix(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int scale, // 16384 for 10 bits + int width, + int height) { + int y; + int halfwidth = (width + 1) >> 1; + void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale, + int width) = Convert16To8Row_C; + void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf, + const uint8* v_buf, uint8* rgb_buf, + const struct YuvConstants* yuvconstants, int width) = + I422ToARGBRow_C; + void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) = + ARGBToAR30Row_C; + + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; + dst_stride_ar30 = -dst_stride_ar30; + } + +#if defined(HAS_CONVERT16TO8ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Convert16To8Row = Convert16To8Row_C; // TODO(fbarchard): Any AVX2 + if (IS_ALIGNED(width, 64)) { + Convert16To8Row = Convert16To8Row_AVX2; + } + } +#endif + +#if defined(HAS_ARGBTOAR30ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAR30Row = ARGBToAR30Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAR30Row = ARGBToAR30Row_AVX2; + } + } +#endif + +#if defined(HAS_I422TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I422ToARGBRow = I422ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I422TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToARGBRow = I422ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + I422ToARGBRow = I422ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_I422TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I422ToARGBRow = I422ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_I422TOARGBROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + I422ToARGBRow = I422ToARGBRow_Any_MSA; + if (IS_ALIGNED(width, 8)) { + I422ToARGBRow = I422ToARGBRow_MSA; + } + } +#endif + + align_buffer_64(row_y, width); + align_buffer_64(row_u, halfwidth); + align_buffer_64(row_v, halfwidth); + align_buffer_64(row_argb, width * 4); + + for (y = 0; y < height; ++y) { + Convert16To8Row(src_y, row_y, scale, width); + Convert16To8Row(src_u, row_u, scale, halfwidth); + Convert16To8Row(src_v, row_v, scale, halfwidth); + + I422ToARGBRow(row_y, row_u, row_v, row_argb, yuvconstants, width); + + ARGBToAR30Row(row_argb, dst_ar30, width); + + dst_ar30 += dst_stride_ar30; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + free_aligned_buffer_64(row_y); + free_aligned_buffer_64(row_u); + free_aligned_buffer_64(row_v); + free_aligned_buffer_64(row_argb); + return 0; +} + +// Convert H010 to AR30. +LIBYUV_API +int H010ToAR30(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_ar30, + int dst_stride_ar30, + int width, + int height) { + return H010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_ar30, dst_stride_ar30, + &kYuvH709Constants, 16384, width, height); +} + // Convert I444 to ARGB with matrix static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y, diff --git a/source/row_common.cc b/source/row_common.cc index 5dfd57aed..3263142b7 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1811,6 +1811,11 @@ void MergeRGBRow_C(const uint8* src_r, } } +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 128 = 9 bits +// 64 = 10 bits +// 16 = 12 bits +// 1 = 16 bits void MergeUVRow_16_C(const uint16* src_u, const uint16* src_v, uint16* dst_uv, @@ -1840,6 +1845,21 @@ void MultiplyRow_16_C(const uint16* src_y, } } +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +void Convert16To8Row_C(const uint16* src_y, + uint8* dst_y, + int scale, + int width) { + int x; + for (x = 0; x < width; ++x) { + dst_y[x] = (src_y[x] * scale) >> 16; + } +} + void CopyRow_C(const uint8* src, uint8* dst, int count) { memcpy(dst, src, count); } diff --git a/source/row_gcc.cc b/source/row_gcc.cc index f348b7edc..bfebbb3e3 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -702,52 +702,51 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) { #ifdef HAS_ARGBTOAR30ROW_AVX2 void ARGBToAR30Row_AVX2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 0x000000ff mask - "vpsrld $0x18,%%ymm4,%%ymm4 \n" - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0xc0000000 mask - "vpslld $30,%%ymm5,%%ymm5 \n" + asm volatile( + "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 0x000000ff mask + "vpsrld $0x18,%%ymm4,%%ymm4 \n" + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0xc0000000 mask + "vpslld $30,%%ymm5,%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm0 \n" - // alpha - "vpand %%ymm5,%%ymm0,%%ymm3 \n" - // red - "vpsrld $0x10,%%ymm0,%%ymm1 \n" - "vpand %%ymm4,%%ymm1,%%ymm1 \n" - "vpsrld $0x6,%%ymm1,%%ymm2 \n" - "vpslld $22,%%ymm1,%%ymm1 \n" - "vpslld $20,%%ymm2,%%ymm2 \n" - "vpor %%ymm1,%%ymm3,%%ymm3 \n" - "vpor %%ymm2,%%ymm3,%%ymm3 \n" - //green - "vpsrld $0x08,%%ymm0,%%ymm1 \n" - "vpand %%ymm4,%%ymm1,%%ymm1 \n" - "vpsrld $0x6,%%ymm1,%%ymm2 \n" - "vpslld $12,%%ymm1,%%ymm1 \n" - "vpslld $10,%%ymm2,%%ymm2 \n" - "vpor %%ymm1,%%ymm3,%%ymm3 \n" - "vpor %%ymm2,%%ymm3,%%ymm3 \n" - //blue - "vpand %%ymm4,%%ymm0,%%ymm1 \n" - "vpsrld $0x6,%%ymm1,%%ymm2 \n" - "vpslld $2,%%ymm1,%%ymm1 \n" - "vpor %%ymm1,%%ymm3,%%ymm3 \n" - "vpor %%ymm2,%%ymm3,%%ymm3 \n" + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + // alpha + "vpand %%ymm5,%%ymm0,%%ymm3 \n" + // red + "vpsrld $0x10,%%ymm0,%%ymm1 \n" + "vpand %%ymm4,%%ymm1,%%ymm1 \n" + "vpsrld $0x6,%%ymm1,%%ymm2 \n" + "vpslld $22,%%ymm1,%%ymm1 \n" + "vpslld $20,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm3,%%ymm3 \n" + "vpor %%ymm2,%%ymm3,%%ymm3 \n" + // green + "vpsrld $0x08,%%ymm0,%%ymm1 \n" + "vpand %%ymm4,%%ymm1,%%ymm1 \n" + "vpsrld $0x6,%%ymm1,%%ymm2 \n" + "vpslld $12,%%ymm1,%%ymm1 \n" + "vpslld $10,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm3,%%ymm3 \n" + "vpor %%ymm2,%%ymm3,%%ymm3 \n" + // blue + "vpand %%ymm4,%%ymm0,%%ymm1 \n" + "vpsrld $0x6,%%ymm1,%%ymm2 \n" + "vpslld $2,%%ymm1,%%ymm1 \n" + "vpor %%ymm1,%%ymm3,%%ymm3 \n" + "vpor %%ymm2,%%ymm3,%%ymm3 \n" - "vmovdqu %%ymm3,(%1) \n" - "add $0x20,%0 \n" - "add $0x20,%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - :: "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" - ); + "vmovdqu %%ymm3,(%1) \n" + "add $0x20,%0 \n" + "add $0x20,%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif @@ -2851,6 +2850,11 @@ void MergeUVRow_16_AVX2(const uint16* src_u, } #endif // HAS_MERGEUVROW_AVX2 +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 128 = 9 bits +// 64 = 10 bits +// 16 = 12 bits +// 1 = 16 bits #ifdef HAS_MULTIPLYROW_16_AVX2 void MultiplyRow_16_AVX2(const uint16* src_y, uint16* dst_y, @@ -2885,6 +2889,47 @@ void MultiplyRow_16_AVX2(const uint16* src_y, } #endif // HAS_MULTIPLYROW_16_AVX2 +// Use scale to convert lsb formats to msb, depending how many bits there are: +// 32768 = 9 bits +// 16384 = 10 bits +// 4096 = 12 bits +// 256 = 16 bits +#ifdef HAS_MULTIPLYROW_16_AVX2 +void Convert16To8Row_AVX2(const uint16* src_y, + uint8* dst_y, + int scale, + int width) { + // clang-format off + asm volatile ( + "vmovd %3,%%xmm3 \n" + "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" + "vbroadcastss %%xmm3,%%ymm3 \n" + + // 32 pixels per loop. + LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vpmulhuw %%ymm3,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "add $0x40,%0 \n" + "add $0x20,%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" + : "+r"(src_y), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(scale) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm3"); + // clang-format on +} +#endif // HAS_MULTIPLYROW_16_AVX2 + #ifdef HAS_SPLITRGBROW_SSSE3 // Shuffle table for converting RGB to Planar. diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index ead5919c3..8bcb63d3c 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1963,4 +1963,67 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { } #endif // HAS_ARGBTOAR30ROW_AVX2 +// Alias to copy pixels as is +#define AR30ToAR30 ARGBToARGB + +#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + ALIGN, YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, \ + BPP_C) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + const int kBpc = 2; \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + reinterpret_cast(src_y)[i + OFF] = (fastrand() & 0x3ff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + reinterpret_cast(src_u)[i + OFF] = (fastrand() & 0x3ff); \ + reinterpret_cast(src_v)[i + OFF] = (fastrand() & 0x3ff); \ + } \ + memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \ + memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(reinterpret_cast(src_y) + OFF, kWidth, \ + reinterpret_cast(src_u) + OFF, kStrideUV, \ + reinterpret_cast(src_v) + OFF, kStrideUV, \ + dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(reinterpret_cast(src_y) + OFF, kWidth, \ + reinterpret_cast(src_u) + OFF, kStrideUV, \ + reinterpret_cast(src_v) + OFF, kStrideUV, \ + dst_argb_opt + OFF, kStrideB, kWidth, \ + NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ + int abs_diff = abs(static_cast(dst_argb_c[i]) - \ + static_cast(dst_argb_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_u); \ + free_aligned_buffer_page_end(src_v); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } + +#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, DIFF, FMT_C, BPP_C) \ + TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C) + +TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2, AR30, 4) + } // namespace libyuv diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index f9e6f8abb..151bcafd1 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -2661,7 +2661,7 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { } #endif -// TODO(fbarchard): improve test for platforms and cpu detect +// TODO(fbarchard): Improve test for more platforms. #ifdef HAS_MULTIPLYROW_16_AVX2 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { const int kPixels = benchmark_width_ * benchmark_height_; @@ -2697,7 +2697,48 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { free_aligned_buffer_page_end(dst_pixels_y_opt); free_aligned_buffer_page_end(dst_pixels_y_c); } -#endif +#endif // HAS_MULTIPLYROW_16_AVX2 + +// TODO(fbarchard): Improve test for more platforms. +#ifdef HAS_CONVERT16TO8ROW_AVX2 +TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + // C code does not clamp so limit source range to 10 bits. + for (int i = 0; i < kPixels; ++i) { + reinterpret_cast(src_pixels_y)[i] &= 1023; + } + + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + Convert16To8Row_C(reinterpret_cast(src_pixels_y), + dst_pixels_y_c, 16384, kPixels); + + int has_avx2 = TestCpuFlag(kCpuHasAVX2); + for (int i = 0; i < benchmark_iterations_; ++i) { + if (has_avx2) { + Convert16To8Row_AVX2(reinterpret_cast(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } else { + Convert16To8Row_C(reinterpret_cast(src_pixels_y), + dst_pixels_y_opt, 16384, kPixels); + } + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} +#endif // HAS_CONVERT16TO8ROW_AVX2 float TestScaleMaxSamples(int benchmark_width, int benchmark_height, diff --git a/unit_test/video_common_test.cc b/unit_test/video_common_test.cc index 424d79986..ba7b15a9d 100644 --- a/unit_test/video_common_test.cc +++ b/unit_test/video_common_test.cc @@ -80,6 +80,8 @@ TEST_F(LibYUVBaseTest, TestFourCC) { EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP)); EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO)); EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010)); EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG)); EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12)); EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16));