diff --git a/README.chromium b/README.chromium index 7bf12e325..7125848f1 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1682 +Version: 1683 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index f096d193a..c1574eb1b 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -77,6 +77,42 @@ int I420Copy(const uint8* src_y, int width, int height); +// Copy I010 to I010 +#define I010ToI010 I010Copy +#define H010ToH010 I010Copy +LIBYUV_API +int I010Copy(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint16* dst_y, + int dst_stride_y, + uint16* dst_u, + int dst_stride_u, + uint16* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert 10 bit YUV to 8 bit +LIBYUV_API +int I010ToI420(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_y, + int dst_stride_y, + uint8* dst_u, + int dst_stride_u, + uint8* dst_v, + int dst_stride_v, + int width, + int height); + // Convert I400 (grey) to I420. LIBYUV_API int I400ToI420(const uint8* src_y, diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index c91501a9c..88b651ddc 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -39,6 +39,15 @@ void CopyPlane_16(const uint16* src_y, int width, int height); +LIBYUV_API +void Convert16To8Plane(const uint16* src_y, + int src_stride_y, + uint8* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height); + // Set a plane of data to a 32 bit value. LIBYUV_API void SetPlane(uint8* dst_y, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index ce052dde3..39e6cc4d5 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1682 +#define LIBYUV_VERSION 1683 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index ef78fb5f5..8ef06afc3 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -106,6 +106,92 @@ int I420Copy(const uint8* src_y, return 0; } +// Copy I010 with optional flipping +LIBYUV_API +int I010Copy(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint16* dst_y, + int dst_stride_y, + uint16* dst_u, + int dst_stride_u, + uint16* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + if (dst_y) { + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + // Copy UV planes. + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); + return 0; +} + +// Convert 10 bit YUV to 8 bit +LIBYUV_API +int I010ToI420(const uint16* src_y, + int src_stride_y, + const uint16* src_u, + int src_stride_u, + const uint16* src_v, + int src_stride_v, + uint8* dst_y, + int dst_stride_y, + uint8* dst_u, + int dst_stride_u, + uint8* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + // Convert Y plane. + Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width, + height); + // Convert UV planes. + Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth, + halfheight); + Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth, + halfheight); + return 0; +} + // 422 chroma is 1/2 width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API diff --git a/source/convert_from.cc b/source/convert_from.cc index 5c8037530..e57bb4bf4 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -1240,8 +1240,8 @@ int ConvertFromI420(const uint8* y, break; case FOURCC_RGBP: r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); + dst_sample_stride ? dst_sample_stride : width * 2, width, + height); break; case FOURCC_RGBO: r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample, @@ -1255,8 +1255,8 @@ int ConvertFromI420(const uint8* y, break; case FOURCC_24BG: r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample, - dst_sample_stride ? dst_sample_stride : width * 3, - width, height); + dst_sample_stride ? dst_sample_stride : width * 3, width, + height); break; case FOURCC_RAW: r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample, diff --git a/source/planar_functions.cc b/source/planar_functions.cc index e65f17887..8127fe3a2 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -50,6 +50,7 @@ void CopyPlane(const uint8* src_y, if (src_y == dst_y && src_stride_y == dst_stride_y) { return; } + #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; @@ -120,6 +121,56 @@ void CopyPlane_16(const uint16* src_y, } } +// Convert a plane of 16 bit data to 8 bit +LIBYUV_API +void Convert16To8Plane(const uint16* src_y, + int src_stride_y, + uint8* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height) { + int y; + void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale, + int width) = Convert16To8Row_C; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + // Coalesce rows. + if (src_stride_y == width && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_y = dst_stride_y = 0; + } +#if defined(HAS_CONVERT16TO8ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + Convert16To8Row = Convert16To8Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + Convert16To8Row = Convert16To8Row_SSSE3; + } + } +#endif +#if defined(HAS_CONVERT16TO8ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + Convert16To8Row = Convert16To8Row_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + Convert16To8Row = Convert16To8Row_AVX2; + } + } +#endif + + // Copy plane + for (y = 0; y < height; ++y) { + Convert16To8Row(src_y, dst_y, scale, width); + src_y += src_stride_y; + dst_y += dst_stride_y; + } +} + // Copy I422. LIBYUV_API int I422Copy(const uint8* src_y, diff --git a/source/row_common.cc b/source/row_common.cc index 3263142b7..d69634908 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1856,7 +1856,7 @@ void Convert16To8Row_C(const uint16* src_y, int width) { int x; for (x = 0; x < width; ++x) { - dst_y[x] = (src_y[x] * scale) >> 16; + dst_y[x] = clamp255((src_y[x] * scale) >> 16); } } diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 10781cba8..d322c7b8b 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -2956,20 +2956,20 @@ void Convert16To8Row_SSSE3(const uint16* src_y, int width) { // clang-format off asm volatile ( - "movd %3,%%xmm3 \n" - "punpcklwd %%xmm3,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" + "movd %3,%%xmm2 \n" + "punpcklwd %%xmm2,%%xmm2 \n" + "pshufd $0x0,%%xmm2,%%xmm2 \n" // 32 pixels per loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" - "pmulhuw %%xmm3,%%xmm0 \n" - "pmulhuw %%xmm3,%%xmm1 \n" + "add $0x20,%0 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm2,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" - "add $0x20,%0 \n" "add $0x10,%1 \n" "sub $0x10,%2 \n" "jg 1b \n" @@ -2977,7 +2977,7 @@ void Convert16To8Row_SSSE3(const uint16* src_y, "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(scale) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm3"); + : "memory", "cc", "xmm0", "xmm1", "xmm2"); // clang-format on } @@ -2988,22 +2988,21 @@ void Convert16To8Row_AVX2(const uint16* src_y, int width) { // clang-format off asm volatile ( - "vmovd %3,%%xmm3 \n" - "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" - "vbroadcastss %%xmm3,%%ymm3 \n" + "vmovd %3,%%xmm2 \n" + "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n" + "vbroadcastss %%xmm2,%%ymm2 \n" // 32 pixels per loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" - "vpmulhuw %%ymm3,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - + "add $0x40,%0 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" - "add $0x40,%0 \n" "add $0x20,%1 \n" "sub $0x20,%2 \n" "jg 1b \n" @@ -3012,7 +3011,7 @@ void Convert16To8Row_AVX2(const uint16* src_y, "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(scale) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm3"); + : "memory", "cc", "xmm0", "xmm1", "xmm2"); // clang-format on } #endif // HAS_MULTIPLYROW_16_AVX2 diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index ec04530f9..b246064d5 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include @@ -35,98 +36,71 @@ namespace libyuv { #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) -#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +// Planar test + +#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + assert(SRC_BPC == 1 || SRC_BPC == 2); \ + assert(DST_BPC == 1 || DST_BPC == 2); \ + assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2); \ + assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2); \ + assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2); \ + assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2); \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kHeight = benchmark_height_; \ - align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ - align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ - OFF); \ - align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ - OFF); \ - align_buffer_page_end(dst_y_c, kWidth* kHeight); \ - align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ - align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (fastrand() & 0xff); \ - src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (fastrand() & 0xff); \ - } \ - } \ - memset(dst_y_c, 1, kWidth* kHeight); \ - memset(dst_u_c, 2, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_c, 3, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth* kHeight); \ - memset(dst_u_opt, 102, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_opt, 103, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ + const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ + const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_u, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(src_v, \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ + MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ - dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + reinterpret_cast(src_y + OFF), kWidth, \ + reinterpret_cast(src_u + OFF), kSrcHalfWidth, \ + reinterpret_cast(src_v + OFF), kSrcHalfWidth, \ + reinterpret_cast(dst_y_c), kWidth, \ + reinterpret_cast(dst_u_c), kDstHalfWidth, \ + reinterpret_cast(dst_v_c), kDstHalfWidth, kWidth, \ + NEG kHeight); \ MaskCpuFlags(benchmark_cpu_info_); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ - dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ + reinterpret_cast(src_y + OFF), kWidth, \ + reinterpret_cast(src_u + OFF), kSrcHalfWidth, \ + reinterpret_cast(src_v + OFF), kSrcHalfWidth, \ + reinterpret_cast(dst_y_opt), kWidth, \ + reinterpret_cast(dst_u_opt), kDstHalfWidth, \ + reinterpret_cast(dst_v_opt), kDstHalfWidth, kWidth, \ + NEG kHeight); \ } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = abs(static_cast(dst_y_c[i * kWidth + j]) - \ - static_cast(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ + for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ } \ - EXPECT_EQ(0, max_diff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = abs( \ - static_cast(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast( \ - dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ + for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ } \ - EXPECT_LE(max_diff, 3); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = abs( \ - static_cast(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast( \ - dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 3); \ free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(dst_u_c); \ free_aligned_buffer_page_end(dst_v_c); \ @@ -138,25 +112,32 @@ namespace libyuv { free_aligned_buffer_page_end(src_v); \ } -#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ - SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ + SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ + DST_SUBSAMP_X, DST_SUBSAMP_Y) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) -TESTPLANARTOP(I420, 2, 2, I420, 2, 2) -TESTPLANARTOP(I422, 2, 1, I420, 2, 2) -TESTPLANARTOP(I444, 1, 1, I420, 2, 2) -TESTPLANARTOP(I420, 2, 2, I422, 2, 1) -TESTPLANARTOP(I420, 2, 2, I444, 1, 1) -TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2) -TESTPLANARTOP(I422, 2, 1, I422, 2, 1) -TESTPLANARTOP(I444, 1, 1, I444, 1, 1) +TESTPLANARTOP(I420, uint8, 1, 2, 2, I420, uint8, 1, 2, 2) +TESTPLANARTOP(I422, uint8, 1, 2, 1, I420, uint8, 1, 2, 2) +TESTPLANARTOP(I444, uint8, 1, 1, 1, I420, uint8, 1, 2, 2) +TESTPLANARTOP(I420, uint8, 1, 2, 2, I422, uint8, 1, 2, 1) +TESTPLANARTOP(I420, uint8, 1, 2, 2, I444, uint8, 1, 1, 1) +TESTPLANARTOP(I420, uint8, 1, 2, 2, I420Mirror, uint8, 1, 2, 2) +TESTPLANARTOP(I422, uint8, 1, 2, 1, I422, uint8, 1, 2, 1) +TESTPLANARTOP(I444, uint8, 1, 1, 1, I444, uint8, 1, 1, 1) +TESTPLANARTOP(I010, uint16, 2, 2, 2, I010, uint16, 2, 2, 2) +TESTPLANARTOP(I010, uint16, 2, 2, 2, I420, uint8, 1, 2, 2) // Test Android 420 to I420 #define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 6e1c27cad..02a9d5272 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -2699,6 +2699,37 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { } #endif // HAS_MULTIPLYROW_16_AVX2 +TEST_F(LibYUVPlanarTest, Convert16To8Plane) { + const int kPixels = benchmark_width_ * benchmark_height_; + align_buffer_page_end(src_pixels_y, kPixels * 2); + align_buffer_page_end(dst_pixels_y_opt, kPixels); + align_buffer_page_end(dst_pixels_y_c, kPixels); + + MemRandomize(src_pixels_y, kPixels * 2); + memset(dst_pixels_y_opt, 0, kPixels); + memset(dst_pixels_y_c, 1, kPixels); + + MaskCpuFlags(disable_cpu_flags_); + Convert16To8Plane(reinterpret_cast(src_pixels_y), + benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384, + benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info_); + + for (int i = 0; i < benchmark_iterations_; ++i) { + Convert16To8Plane(reinterpret_cast(src_pixels_y), + benchmark_width_, dst_pixels_y_opt, benchmark_width_, + 16384, benchmark_width_, benchmark_height_); + } + + for (int i = 0; i < kPixels; ++i) { + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + } + + free_aligned_buffer_page_end(src_pixels_y); + free_aligned_buffer_page_end(dst_pixels_y_opt); + free_aligned_buffer_page_end(dst_pixels_y_c); +} + // TODO(fbarchard): Improve test for more platforms. #ifdef HAS_CONVERT16TO8ROW_AVX2 TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc index 1ad4dece8..c2d7a1db7 100644 --- a/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -19,10 +19,6 @@ #endif #include "libyuv/cpu_id.h" -// Change this to 1000 for benchmarking. -// TODO(fbarchard): Add command line parsing to pass this as option. -#define BENCHMARK_ITERATIONS 1 - unsigned int fastrand_seed = 0xfb; #ifdef LIBYUV_USE_GFLAGS @@ -47,7 +43,7 @@ static const int32 FLAGS_libyuv_cpu_info = 0; // Set flags to -1 for benchmarking to avoid slower C code. LibYUVConvertTest::LibYUVConvertTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -92,12 +88,6 @@ LibYUVConvertTest::LibYUVConvertTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -107,7 +97,7 @@ LibYUVConvertTest::LibYUVConvertTest() } LibYUVColorTest::LibYUVColorTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -152,12 +142,6 @@ LibYUVColorTest::LibYUVColorTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -167,7 +151,7 @@ LibYUVColorTest::LibYUVColorTest() } LibYUVScaleTest::LibYUVScaleTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -212,12 +196,6 @@ LibYUVScaleTest::LibYUVScaleTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -227,7 +205,7 @@ LibYUVScaleTest::LibYUVScaleTest() } LibYUVRotateTest::LibYUVRotateTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -272,12 +250,6 @@ LibYUVRotateTest::LibYUVRotateTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -287,7 +259,7 @@ LibYUVRotateTest::LibYUVRotateTest() } LibYUVPlanarTest::LibYUVPlanarTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -332,12 +304,6 @@ LibYUVPlanarTest::LibYUVPlanarTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -347,7 +313,7 @@ LibYUVPlanarTest::LibYUVPlanarTest() } LibYUVBaseTest::LibYUVBaseTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -392,12 +358,6 @@ LibYUVBaseTest::LibYUVBaseTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) * @@ -407,7 +367,7 @@ LibYUVBaseTest::LibYUVBaseTest() } LibYUVCompareTest::LibYUVCompareTest() - : benchmark_iterations_(BENCHMARK_ITERATIONS), + : benchmark_iterations_(1), benchmark_width_(128), benchmark_height_(72), disable_cpu_flags_(1), @@ -452,12 +412,6 @@ LibYUVCompareTest::LibYUVCompareTest() benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; } libyuv::MaskCpuFlags(benchmark_cpu_info_); - benchmark_pixels_div256_ = - static_cast((static_cast(Abs(benchmark_width_)) * - static_cast(Abs(benchmark_height_)) * - static_cast(benchmark_iterations_) + - 255.0) / - 256.0); benchmark_pixels_div1280_ = static_cast((static_cast(Abs(benchmark_width_)) * static_cast(Abs(benchmark_height_)) *