Add support for MM21.

Add support for MM21 to NV12 and I420 conversion, and add SIMD
optimizations for arm, aarch64, SSE2, and SSSE3 machines.

Bug: libyuv:915, b/215425056
Change-Id: Iecb0c33287f35766a6169d4adf3b7397f1ba8b5d
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3433269
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Justin Green <greenjustin@google.com>
This commit is contained in:
Justin Green 2022-02-03 11:46:44 -05:00 committed by libyuv LUCI CQ
parent 804980bbab
commit b4ddbaf549
13 changed files with 628 additions and 113 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1810 Version: 1811
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -106,6 +106,34 @@ int I422ToI444(const uint8_t* src_y,
int width, int width,
int height); int height);
// Convert MM21 to NV12.
LIBYUV_API
int MM21ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert MM21 to I420.
LIBYUV_API
int MM21ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I422 to NV21. // Convert I422 to NV21.
LIBYUV_API LIBYUV_API
int I422ToNV21(const uint8_t* src_y, int I422ToNV21(const uint8_t* src_y,

View File

@ -93,6 +93,18 @@ void DetilePlane(const uint8_t* src_y,
int height, int height,
int tile_height); int tile_height);
// Convert a UV plane of tiles of 16 x H into linear U and V planes.
LIBYUV_API
void DetileSplitUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
int tile_height);
// Split interleaved UV plane into separate U and V planes. // Split interleaved UV plane into separate U and V planes.
LIBYUV_API LIBYUV_API
void SplitUVPlane(const uint8_t* src_uv, void SplitUVPlane(const uint8_t* src_uv,

View File

@ -290,6 +290,8 @@ extern "C" {
#define HAS_AB64TOARGBROW_SSSE3 #define HAS_AB64TOARGBROW_SSSE3
#define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2 #define HAS_CONVERT8TO16ROW_SSE2
#define HAS_DETILEROW_SSE2
#define HAS_DETILESPLITUVROW_SSSE3
#define HAS_HALFMERGEUVROW_SSSE3 #define HAS_HALFMERGEUVROW_SSSE3
#define HAS_I210TOAR30ROW_SSSE3 #define HAS_I210TOAR30ROW_SSSE3
#define HAS_I210TOARGBROW_SSSE3 #define HAS_I210TOARGBROW_SSSE3
@ -537,6 +539,7 @@ extern "C" {
#define HAS_GAUSSROW_F32_NEON #define HAS_GAUSSROW_F32_NEON
#define HAS_GAUSSCOL_F32_NEON #define HAS_GAUSSCOL_F32_NEON
#define HAS_DETILEROW_NEON #define HAS_DETILEROW_NEON
#define HAS_DETILESPLITUVROW_NEON
#endif #endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_ABGRTOUVROW_MSA #define HAS_ABGRTOUVROW_MSA
@ -1839,6 +1842,43 @@ void DetileRow_NEON(const uint8_t* src,
ptrdiff_t src_tile_stride, ptrdiff_t src_tile_stride,
uint8_t* dst, uint8_t* dst,
int width); int width);
void DetileRow_Any_NEON(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
void DetileRow_SSE2(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
void DetileRow_Any_SSE2(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width);
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void DetileSplitUVRow_SSSE3(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void DetileSplitUVRow_Any_SSSE3(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void MergeUVRow_C(const uint8_t* src_u, void MergeUVRow_C(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uv, uint8_t* dst_uv,

View File

@ -564,6 +564,60 @@ int I422ToNV21(const uint8_t* src_y,
return 0; return 0;
} }
LIBYUV_API
int MM21ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (!src_uv || !dst_uv || width <= 0) {
return -1;
}
int sign = height < 0 ? -1 : 1;
if (dst_y) {
DetilePlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height, 32);
}
DetilePlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, (width + 1) & ~1,
(height + sign) / 2, 16);
return 0;
}
LIBYUV_API
int MM21ToI420(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height) {
int sign = height < 0 ? -1 : 1;
if (!src_uv || !dst_u || !dst_v || width <= 0) {
return -1;
}
if (dst_y) {
DetilePlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height, 32);
}
DetileSplitUVPlane(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
dst_stride_v, (width + 1) & ~1, (height + sign) / 2, 16);
return 0;
}
#ifdef I422TONV21_ROW_VERSION #ifdef I422TONV21_ROW_VERSION
// Unittest fails for this version. // Unittest fails for this version.
// 422 chroma is 1/2 width, 1x height // 422 chroma is 1/2 width, 1x height

View File

@ -882,9 +882,20 @@ void DetilePlane(const uint8_t* src_y,
dst_stride_y = -dst_stride_y; dst_stride_y = -dst_stride_y;
} }
#if defined(HAS_DETILEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
DetileRow = DetileRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
DetileRow = DetileRow_SSE2;
}
}
#endif
#if defined(HAS_DETILEROW_NEON) #if defined(HAS_DETILEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
DetileRow = DetileRow_NEON; DetileRow = DetileRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
DetileRow = DetileRow_NEON;
}
} }
#endif #endif
@ -900,6 +911,64 @@ void DetilePlane(const uint8_t* src_y,
} }
} }
LIBYUV_API
void DetileSplitUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height,
int tile_height) {
const ptrdiff_t src_tile_stride = 16 * tile_height;
int y;
void (*DetileSplitUVRow)(const uint8_t* src, ptrdiff_t src_tile_stride,
uint8_t* dst_u, uint8_t* dst_v, int width) =
DetileSplitUVRow_C;
assert(src_stride_uv >= 0);
assert(tile_height > 0);
assert(src_stride_uv > 0);
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_stride_u = -dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_v = -dst_stride_v;
}
#if defined(HAS_DETILESPLITUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
DetileSplitUVRow = DetileSplitUVRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
DetileSplitUVRow = DetileSplitUVRow_SSSE3;
}
}
#endif
#if defined(HAS_DETILESPLITROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
DetileSplitUVRow = DetileSplitUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
DetileSplitUVRow = DetileSplitUVRow_NEON;
}
}
#endif
// Detile plane
for (y = 0; y < height; ++y) {
DetileSplitUVRow(src_uv, src_tile_stride, dst_u, dst_v, width);
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_uv += 16;
// Advance to next row of tiles.
if ((y & (tile_height - 1)) == (tile_height - 1)) {
src_uv = src_uv - src_tile_stride + src_stride_uv * tile_height;
}
}
}
// Support function for NV12 etc RGB channels. // Support function for NV12 etc RGB channels.
// Width and height are plane sizes (typically half pixel width). // Width and height are plane sizes (typically half pixel width).
LIBYUV_API LIBYUV_API

View File

@ -2059,6 +2059,51 @@ ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
#endif #endif
#undef ANY11S #undef ANY11S
#define ANYDETILE(NAMEANY, ANY_SIMD, MASK) \
void NAMEANY(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, \
int width) { \
SIMD_ALIGNED(uint8_t temp[16 * 2]); \
memset(temp, 0, 16); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src, src_tile_stride, dst, n); \
} \
memcpy(temp, src + (n / 16) * src_tile_stride, r); \
ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1); \
memcpy(dst + n, temp + 16, r); \
}
#ifdef HAS_DETILEROW_NEON
ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, 15)
#endif
#ifdef HAS_DETILEROW_SSE2
ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, 15)
#endif
#define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK) \
void NAMEANY(const uint8_t* src_uv, ptrdiff_t src_tile_stride, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \
SIMD_ALIGNED(uint8_t temp[16 * 2]); \
memset(temp, 0, 16 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n); \
} \
memcpy(temp, src_uv + (n / 16) * src_tile_stride, r); \
ANY_SIMD(temp, src_tile_stride, temp + 16, temp + 24, r); \
memcpy(dst_u + n / 2, temp + 16, (r + 1) / 2); \
memcpy(dst_v + n / 2, temp + 24, (r + 1) / 2); \
}
#ifdef HAS_DETILESPLITUVROW_NEON
ANYDETILESPLITUV(DetileSplitUVRow_Any_NEON, DetileSplitUVRow_NEON, 15)
#endif
#ifdef HAS_DETILESPLITUVROW_SSSE3
ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15)
#endif
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv

View File

@ -2674,6 +2674,30 @@ void DetileRow_C(const uint8_t* src,
} }
} }
void DetileSplitUVRow_C(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int tile;
for (tile = 0; tile < width / 16; tile++) {
for (int x = 0; x < 8; x++) {
*dst_u++ = src_uv[0];
*dst_v++ = src_uv[1];
src_uv += 2;
}
src_uv += src_tile_stride - 16;
}
for (int x = 0; x < (width & 0xF) / 2; ++x) {
*dst_u = *src_uv;
dst_u++;
src_uv++;
*dst_v = *src_uv;
dst_v++;
src_uv++;
}
}
void SplitUVRow_C(const uint8_t* src_uv, void SplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,

View File

@ -9,7 +9,6 @@
*/ */
#include "libyuv/row.h" #include "libyuv/row.h"
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
@ -4765,6 +4764,63 @@ void SplitUVRow_SSE2(const uint8_t* src_uv,
} }
#endif // HAS_SPLITUVROW_SSE2 #endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_DETILEROW_SSE2
void DetileRow_SSE2(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width) {
asm volatile(
"1: \n"
"movdqu (%0),%%xmm0 \n"
"sub $0x10,%2 \n"
"lea (%0,%3),%0 \n"
"movdqu %%xmm0,(%1) \n"
"lea 0x10(%1),%1 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"(src_tile_stride) // %3
: "cc", "memory", "xmm0");
}
#endif // HAS_DETILEROW_SSE2
#ifdef HAS_DETILESPLITUVROW_SSSE3
// TODO(greenjustin): Look into generating these constants instead of loading
// them since this can cause branch mispredicts for fPIC code on 32-bit
// machines.
static const uvec8 kDeinterlaceUV = {0, 2, 4, 6, 8, 10, 12, 14,
1, 3, 5, 7, 9, 11, 13, 15};
// TODO(greenjustin): Research alternatives to pshufb, since pshufb can be very
// slow on older SSE2 processors.
void DetileSplitUVRow_SSSE3(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile(
"movdqu %4,%%xmm1 \n"
"1: \n"
"movdqu (%0),%%xmm0 \n"
"lea (%0, %5),%0 \n"
"pshufb %%xmm1,%%xmm0 \n"
"movq %%xmm0,(%1) \n"
"lea 0x8(%1),%1 \n"
"movhps %%xmm0,(%2) \n"
"lea 0x8(%2),%2 \n"
"sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
: "m"(kDeinterlaceUV), // %4
"r"(src_tile_stride) // %5
: "cc", "memory", "xmm0", "xmm1");
}
#endif // HAS_DETILESPLITUVROW_SSSE3
#ifdef HAS_MERGEUVROW_AVX2 #ifdef HAS_MERGEUVROW_AVX2
void MergeUVRow_AVX2(const uint8_t* src_u, void MergeUVRow_AVX2(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,

View File

@ -575,6 +575,52 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
); );
} }
// Reads 16 byte Y's from tile and writes out 16 Y's.
// MM21 Y tiles are 16x32 so src_tile_stride = 512 bytes
// MM21 UV tiles are 8x16 so src_tile_stride = 256 bytes
// width measured in bytes so 8 UV = 16.
void DetileRow_NEON(const uint8_t* src,
ptrdiff_t src_tile_stride,
uint8_t* dst,
int width) {
asm volatile(
"1: \n"
"vld1.16 {q0}, [%0], %3 \n" // load 16 bytes
"subs %2, %2, #16 \n" // 16 processed per loop
"pld [%0, 1792] \n"
"vst1.16 {q0}, [%1]! \n" // store 16 bytes
"bgt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "r"(src_tile_stride) // %3
: "cc", "memory", "q0" // Clobber List
);
}
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
"vld2.8 {d0, d1}, [%0], %4 \n"
"subs %3, %3, #16 \n"
"pld [%0, 1792] \n"
"vst1.8 {d0}, [%1]! \n"
"vst1.8 {d1}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
: "r"(src_tile_stride) // %4
: "cc", "memory", "d0", "d1" // Clobber List
);
}
// Reads 16 U's and V's and writes out 16 pairs of UV. // Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u, void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,

View File

@ -627,6 +627,29 @@ void DetileRow_NEON(const uint8_t* src,
); );
} }
// Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
void DetileSplitUVRow_NEON(const uint8_t* src_uv,
ptrdiff_t src_tile_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile(
"1: \n"
"ld2 {v0.8b,v1.8b}, [%0], %4 \n"
"subs %w3, %w3, #16 \n"
"prfm pldl1keep, [%0, 1792] \n"
"st1 {v0.8b}, [%1], #8 \n"
"st1 {v1.8b}, [%2], #8 \n"
"b.gt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
: "r"(src_tile_stride) // %4
: "cc", "memory", "v0", "v1" // Clobber List
);
}
#if LIBYUV_USE_ST2 #if LIBYUV_USE_ST2
// Reads 16 U's and V's and writes out 16 pairs of UV. // Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u, void MergeUVRow_NEON(const uint8_t* src_u,

View File

@ -419,7 +419,7 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ #define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
DOY, SRC_DEPTH) \ DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
@ -433,13 +433,18 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
"DST_SUBSAMP_Y unsupported"); \ "DST_SUBSAMP_Y unsupported"); \
const int kWidth = W1280; \ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
align_buffer_page_end(src_uv, \ const int kPaddedHeight = \
2 * kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \ (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
align_buffer_page_end( \
src_uv, \
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_c, \ align_buffer_page_end(dst_uv_c, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
@ -448,11 +453,11 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \ for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
src_y_p[i] = \ src_y_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \ } \
for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight * 2; ++i) { \ for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
src_uv_p[i] = \ src_uv_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \ } \
@ -497,136 +502,148 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ #define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \ DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \
SRC_DEPTH) \ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \ DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \
SRC_DEPTH) \ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \ DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \
SRC_DEPTH) \ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH) \ DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \ DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \
SRC_DEPTH) SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8) TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8) TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8) TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8) TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10) TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10) TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10) TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12) TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12) TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12) TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ #define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \ DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
SRC_DEPTH) \ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \ static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \ static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \ static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
"SRC_SUBSAMP_X unsupported"); \ "SRC_SUBSAMP_X unsupported"); \
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \ static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
"SRC_SUBSAMP_Y unsupported"); \ "SRC_SUBSAMP_Y unsupported"); \
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \ static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
"DST_SUBSAMP_X unsupported"); \ "DST_SUBSAMP_X unsupported"); \
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \ static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
"DST_SUBSAMP_Y unsupported"); \ "DST_SUBSAMP_Y unsupported"); \
const int kWidth = W1280; \ const int kWidth = W1280; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ const int kPaddedHeight = \
align_buffer_page_end(src_uv, \ (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
kSrcHalfWidth* kSrcHalfHeight* SRC_BPC * 2 + OFF); \ const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ align_buffer_page_end( \
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \
align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \ align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
for (int i = 0; i < kWidth * kHeight; ++i) { \ align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
src_y_p[i] = \ align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
} \ SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
for (int i = 0; i < kSrcHalfWidth * kSrcHalfHeight * 2; ++i) { \ for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
src_uv_p[i] = \ src_y_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \ } \
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ src_uv_p[i] = \
memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ } \
memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \ memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
reinterpret_cast<DST_T*>(dst_y_c), kWidth, \ memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \ MaskCpuFlags(disable_cpu_flags_); \
reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
NEG kHeight); \ src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
MaskCpuFlags(benchmark_cpu_info_); \ reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
for (int i = 0; i < benchmark_iterations_; ++i) { \ reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \
src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ NEG kHeight); \
reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \ MaskCpuFlags(benchmark_cpu_info_); \
reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \ for (int i = 0; i < benchmark_iterations_; ++i) { \
reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
NEG kHeight); \ src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
} \ reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \
EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \
} \ NEG kHeight); \
for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ } \
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
} \ } \
free_aligned_buffer_page_end(dst_y_c); \ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \
free_aligned_buffer_page_end(dst_u_c); \ EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \
free_aligned_buffer_page_end(dst_v_c); \ EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \
free_aligned_buffer_page_end(dst_y_opt); \ } \
free_aligned_buffer_page_end(dst_u_opt); \ free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_v_opt); \ free_aligned_buffer_page_end(dst_u_c); \
free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(dst_v_c); \
free_aligned_buffer_page_end(src_uv); \ free_aligned_buffer_page_end(dst_y_opt); \
free_aligned_buffer_page_end(dst_u_opt); \
free_aligned_buffer_page_end(dst_v_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
} }
#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ #define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH) \ DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH) \ DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \ DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
SRC_DEPTH) \ SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH) \ DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH) DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT)
TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8) TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8) TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
// Provide matrix wrappers for full range bt.709 // Provide matrix wrappers for full range bt.709
#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ #define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \

101
unit_test/planar_test.cc Executable file → Normal file
View File

@ -1523,6 +1523,107 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
free_aligned_buffer_page_end(dst_opt); free_aligned_buffer_page_end(dst_opt);
} }
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
int orig_width = (benchmark_width_ + 15) & ~15;
int orig_height = (benchmark_height_ + 15) & ~15;
int orig_plane_size = orig_width * orig_height;
int u_plane_size = benchmark_width_ * benchmark_height_;
int v_plane_size = u_plane_size;
align_buffer_page_end(orig_uv, orig_plane_size);
align_buffer_page_end(dst_u_c, u_plane_size);
align_buffer_page_end(dst_u_opt, u_plane_size);
align_buffer_page_end(dst_v_c, v_plane_size);
align_buffer_page_end(dst_v_opt, v_plane_size);
MemRandomize(orig_uv, orig_plane_size);
memset(dst_u_c, 0, u_plane_size);
memset(dst_u_opt, 0, u_plane_size);
memset(dst_v_c, 0, v_plane_size);
memset(dst_v_opt, 0, v_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
for (i = 0; i < u_plane_size; ++i) {
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
}
for (i = 0; i < v_plane_size; ++i) {
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
}
free_aligned_buffer_page_end(orig_uv);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_c);
free_aligned_buffer_page_end(dst_v_opt);
}
TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
int i, j;
// orig is tiled. Allocate enough memory for tiles.
int orig_width = (benchmark_width_ + 15) & ~15;
int orig_height = (benchmark_height_ + 15) & ~15;
int orig_plane_size = orig_width * orig_height;
int u_plane_size = benchmark_width_ * benchmark_height_;
int v_plane_size = u_plane_size;
align_buffer_page_end(orig_uv, orig_plane_size);
align_buffer_page_end(detiled_uv, orig_plane_size);
align_buffer_page_end(dst_u_two_stage, u_plane_size);
align_buffer_page_end(dst_u_opt, u_plane_size);
align_buffer_page_end(dst_v_two_stage, v_plane_size);
align_buffer_page_end(dst_v_opt, v_plane_size);
MemRandomize(orig_uv, orig_plane_size);
memset(detiled_uv, 0, orig_plane_size);
memset(dst_u_two_stage, 0, u_plane_size);
memset(dst_u_opt, 0, u_plane_size);
memset(dst_v_two_stage, 0, v_plane_size);
memset(dst_v_opt, 0, v_plane_size);
for (j = 0; j < benchmark_iterations_; j++) {
DetileSplitUVPlane(
orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
}
DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
benchmark_width_, benchmark_height_, 16);
SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
(benchmark_width_ + 1) / 2, dst_v_two_stage,
(benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_);
for (i = 0; i < u_plane_size; ++i) {
EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
}
for (i = 0; i < v_plane_size; ++i) {
EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
}
free_aligned_buffer_page_end(orig_uv);
free_aligned_buffer_page_end(detiled_uv);
free_aligned_buffer_page_end(dst_u_two_stage);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_two_stage);
free_aligned_buffer_page_end(dst_v_opt);
}
static int TestMultiply(int width, static int TestMultiply(int width,
int height, int height,
int benchmark_iterations, int benchmark_iterations,