HalfMergeUVPlane function and optimized I444ToNV12 and I444ToNV21

Bug: libyuv:858
Change-Id: Ie1f03a9acaff02ee8059cf1e5c2c2e5afcde8592
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2154608
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2020-04-17 11:08:04 -07:00 committed by Commit Bot
parent d4c3f45eb6
commit 2f48ffd42b
10 changed files with 336 additions and 84 deletions

View File

@ -105,6 +105,19 @@ void MergeUVPlane(const uint8_t* src_u,
int width,
int height);
// Scale U and V to half width and height and merge into interleaved UV plane.
// width and height are source size, allowing odd sizes.
// Use for converting I444 or I422 to NV12.
LIBYUV_API
void HalfMergeUVPlane(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Swap U and V channels in interleaved UV plane.
LIBYUV_API
void SwapUVPlane(const uint8_t* src_uv,

View File

@ -273,6 +273,7 @@ extern "C" {
#define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
#define HAS_HALFMERGEUVROW_SSSE3
// I210 is for H010. 2 = 422. I for 601 vs H for 709.
#define HAS_I210TOAR30ROW_SSSE3
#define HAS_I210TOARGBROW_SSSE3
@ -343,7 +344,6 @@ extern "C" {
#define HAS_ARGBTOUVJROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON
#define HAS_RGBATOYJROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_AYUVTOUVROW_NEON
#define HAS_AYUVTOVUROW_NEON
@ -353,6 +353,7 @@ extern "C" {
#define HAS_BYTETOFLOATROW_NEON
#define HAS_COPYROW_NEON
#define HAS_HALFFLOATROW_NEON
#define HAS_HALFMERGEUVROW_NEON
#define HAS_I400TOARGBROW_NEON
#define HAS_I422ALPHATOARGBROW_NEON
#define HAS_I422TOARGB1555ROW_NEON
@ -375,19 +376,20 @@ extern "C" {
#define HAS_NV21TORGB24ROW_NEON
#define HAS_NV21TOYUV24ROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RAWTORGBAROW_NEON
#define HAS_RAWTORGB24ROW_NEON
#define HAS_RAWTORGBAROW_NEON
#define HAS_RAWTOUVROW_NEON
#define HAS_RAWTOYROW_NEON
#define HAS_RAWTOYJROW_NEON
#define HAS_RAWTOYROW_NEON
#define HAS_RGB24TOARGBROW_NEON
#define HAS_RGB24TOUVROW_NEON
#define HAS_RGB24TOYROW_NEON
#define HAS_RGB24TOYJROW_NEON
#define HAS_RGB24TOYROW_NEON
#define HAS_RGB565TOARGBROW_NEON
#define HAS_RGB565TOUVROW_NEON
#define HAS_RGB565TOYROW_NEON
#define HAS_RGBATOUVROW_NEON
#define HAS_RGBATOYJROW_NEON
#define HAS_RGBATOYROW_NEON
#define HAS_SETROW_NEON
#define HAS_SPLITRGBROW_NEON
@ -1712,6 +1714,27 @@ void MergeUVRow_Any_MMI(const uint8_t* y_buf,
uint8_t* dst_ptr,
int width);
void HalfMergeUVRow_C(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int width);
void HalfMergeUVRow_NEON(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int width);
void HalfMergeUVRow_SSSE3(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int width);
void SplitRGBRow_C(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,

View File

@ -426,7 +426,41 @@ int I444ToI420(const uint8_t* src_y,
dst_v, dst_stride_v, width, height, width, height);
}
// TODO(fbarchard): Implement row conversion.
LIBYUV_API
int I444ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 ||
height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
HalfMergeUVPlane(src_u, src_stride_u, src_v, src_stride_v, dst_uv,
dst_stride_uv, width, height);
return 0;
}
LIBYUV_API
int I444ToNV21(const uint8_t* src_y,
int src_stride_y,
@ -440,30 +474,9 @@ int I444ToNV21(const uint8_t* src_y,
int dst_stride_vu,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
// Allocate u and v buffers
align_buffer_64(plane_u, halfwidth * halfheight * 2);
uint8_t* plane_v = plane_u + halfwidth * halfheight;
I444ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width,
height);
MergeUVPlane(plane_v, halfwidth, plane_u, halfwidth, dst_vu, dst_stride_vu,
halfwidth, halfheight);
free_aligned_buffer_64(plane_u);
return 0;
return I444ToNV12(src_y, src_stride_y, src_v, src_stride_v, src_u,
src_stride_u, dst_y, dst_stride_y, dst_vu, dst_stride_vu,
width, height);
}
// I400 is greyscale typically used in MJPG
@ -498,46 +511,6 @@ int I400ToI420(const uint8_t* src_y,
return 0;
}
// TODO(fbarchard): Implement row conversion.
LIBYUV_API
int I444ToNV12(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
// Allocate u and v buffers
align_buffer_64(plane_u, halfwidth * halfheight * 2);
uint8_t* plane_v = plane_u + halfwidth * halfheight;
I444ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width,
height);
MergeUVPlane(plane_u, halfwidth, plane_v, halfwidth, dst_uv, dst_stride_uv,
halfwidth, halfheight);
free_aligned_buffer_64(plane_u);
return 0;
}
// I400 is greyscale typically used in MJPG
LIBYUV_API
int I400ToNV21(const uint8_t* src_y,

View File

@ -488,7 +488,6 @@ int I420ToUYVY(const uint8_t* src_y,
return 0;
}
// TODO(fbarchard): test negative height for invert.
LIBYUV_API
int I420ToNV12(const uint8_t* src_y,
int src_stride_y,
@ -502,12 +501,23 @@ int I420ToNV12(const uint8_t* src_y,
int dst_stride_uv,
int width,
int height) {
int halfwidth = (width + 1) / 2;
int halfheight = (height + 1) / 2;
if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 ||
height == 0) {
return -1;
}
int halfwidth = (width + 1) / 2;
int halfheight = height > 0 ? (height + 1) / 2 : (height - 1) / 2;
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}

View File

@ -4103,6 +4103,52 @@ int UYVYToNV12(const uint8_t* src_uyvy,
return 0;
}
// width and height are src size allowing odd size handling.
LIBYUV_API
void HalfMergeUVPlane(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y;
void (*HalfMergeUVRow)(const uint8_t* src_u, int src_stride_u,
const uint8_t* src_v, int src_stride_v,
uint8_t* dst_uv, int width) = HalfMergeUVRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
#if defined(HAS_HALFMERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
HalfMergeUVRow = HalfMergeUVRow_NEON;
}
#endif
#if defined(HAS_HALFMERGEUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
HalfMergeUVRow = HalfMergeUVRow_SSSE3;
}
#endif
for (y = 0; y < height - 1; y += 2) {
// Merge a row of U and V into a row of UV.
HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);
src_u += src_stride_u * 2;
src_v += src_stride_v * 2;
dst_uv += dst_stride_uv;
}
if (height & 1) {
HalfMergeUVRow(src_u, 0, src_v, 0, dst_uv, width);
}
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -3563,6 +3563,30 @@ void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
}
}
void HalfMergeUVRow_C(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
src_u[src_stride_u + 1] + 2) >>
2;
dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
src_v[src_stride_v + 1] + 2) >>
2;
src_u += 2;
src_v += 2;
dst_uv += 2;
}
if (width & 1) {
dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
}
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -1078,6 +1078,8 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
}
#endif
// clang-format off
// TODO(mraptis): Consider passing R, G, B multipliers as parameter.
// round parameter is register containing value to add before shift.
#define RGBTOY(round) \
@ -1102,10 +1104,8 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
"phaddw %%xmm0,%%xmm6 \n" \
"phaddw %%xmm2,%%xmm1 \n" \
"prefetcht0 1280(%0) \n" \
"paddw %%" #round \
",%%xmm6 \n" \
"paddw %%" #round \
",%%xmm1 \n" \
"paddw %%" #round ",%%xmm6 \n" \
"paddw %%" #round ",%%xmm1 \n" \
"psrlw $0x8,%%xmm6 \n" \
"psrlw $0x8,%%xmm1 \n" \
"packuswb %%xmm1,%%xmm6 \n" \
@ -1132,10 +1132,8 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" /* mutates. */ \
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n" \
"prefetcht0 1280(%0) \n" \
"vpaddw %%" #round \
",%%ymm0,%%ymm0 \n" /* Add .5 for rounding. */ \
"vpaddw %%" #round \
",%%ymm2,%%ymm2 \n" \
"vpaddw %%" #round ",%%ymm0,%%ymm0 \n" /* Add .5 for rounding. */ \
"vpaddw %%" #round ",%%ymm2,%%ymm2 \n" \
"vpsrlw $0x8,%%ymm0,%%ymm0 \n" \
"vpsrlw $0x8,%%ymm2,%%ymm2 \n" \
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" /* mutates. */ \
@ -1146,6 +1144,8 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
"jg 1b \n" \
"vzeroupper \n"
// clang-format on
#ifdef HAS_ARGBTOYROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
@ -7005,6 +7005,53 @@ void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
}
#endif // HAS_SWAPUVROW_AVX2
void HalfMergeUVRow_SSSE3(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int width) {
asm volatile(
"pcmpeqb %%xmm4,%%xmm4 \n"
"psrlw $0xf,%%xmm4 \n"
"packuswb %%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n"
"1: \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n" // load 16 U values
"movdqu (%1),%%xmm1 \n" // load 16 V values
"movdqu 0(%0,%4,1),%%xmm2 \n" // 16 from next row
"movdqu 0(%1,%5,1),%%xmm3 \n"
"lea 0x10(%0),%0 \n"
"pmaddubsw %%xmm4,%%xmm0 \n" // half size
"pmaddubsw %%xmm4,%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm2 \n"
"pmaddubsw %%xmm4,%%xmm3 \n"
"lea 0x10(%1),%1 \n"
"paddw %%xmm2,%%xmm0 \n"
"paddw %%xmm3,%%xmm1 \n"
"psrlw $0x1,%%xmm0 \n"
"psrlw $0x1,%%xmm1 \n"
"pavgw %%xmm5,%%xmm0 \n"
"pavgw %%xmm5,%%xmm1 \n"
"packuswb %%xmm0,%%xmm0 \n"
"packuswb %%xmm1,%%xmm1 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"movdqu %%xmm0,(%2) \n" // store 8 UV pixels
"lea 0x10(%2),%2 \n"
"sub $0x10,%3 \n" // 16 src pixels per loop
"jg 1b \n"
: "+r"(src_u), // %0
"+r"(src_v), // %1
"+r"(dst_uv), // %2
"+r"(width) // %3
: "r"((intptr_t)(src_stride_u)), // %4
"r"((intptr_t)(src_stride_v)) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus

View File

@ -2984,6 +2984,39 @@ void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
: "cc", "memory", "q0", "q1", "q2");
}
void HalfMergeUVRow_NEON(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int width) {
const uint8_t* src_u_1 = src_u + src_stride_u;
const uint8_t* src_v_1 = src_v + src_stride_v;
asm volatile(
"1: \n"
"vld1.8 {q0}, [%0]! \n" // load 16 U values
"vld1.8 {q1}, [%2]! \n" // load 16 V values
"vld1.8 {q2}, [%1]! \n"
"vld1.8 {q3}, [%3]! \n"
"vpaddl.u8 q0, q0 \n" // half size
"vpaddl.u8 q1, q1 \n"
"vpadal.u8 q0, q2 \n"
"vpadal.u8 q1, q3 \n"
"vqrshrn.u16 d0, q0, #2 \n"
"vqrshrn.u16 d1, q1, #2 \n"
"subs %5, %5, #16 \n" // 16 src pixels per loop
"vst2.8 {d0, d1}, [%4]! \n" // store 8 UV pixels
"bgt 1b \n"
: "+r"(src_u), // %0
"+r"(src_u_1), // %1
"+r"(src_v), // %2
"+r"(src_v_1), // %3
"+r"(dst_uv), // %4
"+r"(width) // %5
:
: "cc", "memory", "q0", "q1", "q2", "q3");
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
#ifdef __cplusplus

View File

@ -3188,11 +3188,12 @@ void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
"uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
"uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
"prfm pldl1keep, [%0, 448] \n"
"uqrshrn v3.8b, v0.8h, #2 \n" // 2x2 average
"uqrshrn v2.8b, v1.8h, #2 \n"
"prfm pldl1keep, [%1, 448] \n"
"subs %w3, %w3, #16 \n" // 16 processed per loop.
"st2 {v2.8b,v3.8b}, [%2], #16 \n" // store 8 pixels UV.
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"b.gt 1b \n"
: "+r"(src_ayuv), // %0
"+r"(src_ayuv_1), // %1
@ -3210,18 +3211,18 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv,
asm volatile(
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
// pixels.
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 ayuv
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
"uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
"uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
"prfm pldl1keep, [%0, 448] \n"
"uqrshrn v0.8b, v0.8h, #2 \n" // 2x2 average
"uqrshrn v1.8b, v1.8h, #2 \n"
"prfm pldl1keep, [%1, 448] \n"
"subs %w3, %w3, #16 \n" // 16 processed per loop.
"st2 {v0.8b,v1.8b}, [%2], #16 \n" // store 8 pixels VU.
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"b.gt 1b \n"
: "+r"(src_ayuv), // %0
"+r"(src_ayuv_1), // %1
@ -3265,6 +3266,41 @@ void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
: "cc", "memory", "v0", "v1", "v2");
}
void HalfMergeUVRow_NEON(const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_uv,
int width) {
const uint8_t* src_u_1 = src_u + src_stride_u;
const uint8_t* src_v_1 = src_v + src_stride_v;
asm volatile(
"1: \n"
"ld1 {v0.16b}, [%0], #16 \n" // load 16 U values
"ld1 {v1.16b}, [%2], #16 \n" // load 16 V values
"ld1 {v2.16b}, [%1], #16 \n"
"ld1 {v3.16b}, [%3], #16 \n"
"uaddlp v0.8h, v0.16b \n" // half size
"uaddlp v1.8h, v1.16b \n"
"prfm pldl1keep, [%0, 448] \n" // prefetch 7 lines ahead
"uadalp v0.8h, v2.16b \n"
"uadalp v1.8h, v3.16b \n"
"prfm pldl1keep, [%2, 448] \n"
"uqrshrn v0.8b, v0.8h, #2 \n"
"uqrshrn v1.8b, v1.8h, #2 \n"
"subs %w5, %w5, #16 \n" // 16 src pixels per loop
"st2 {v0.8b, v1.8b}, [%4], #16 \n" // store 8 UV pixels
"b.gt 1b \n"
: "+r"(src_u), // %0
"+r"(src_u_1), // %1
"+r"(src_v), // %2
"+r"(src_v_1), // %3
"+r"(dst_uv), // %4
"+r"(width) // %5
:
: "cc", "memory", "v0", "v1", "v2", "v3");
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus

View File

@ -21,6 +21,7 @@
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/scale.h"
#ifdef ENABLE_ROW_TESTS
// row.h defines SIMD_ALIGNED, overriding unit_test.h
@ -3479,4 +3480,50 @@ TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
// Round count up to multiple of 16
int dst_width = (benchmark_width_ + 1) / 2;
int dst_height = (benchmark_height_ + 1) / 2;
align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
MemRandomize(tmp_pixels_u, dst_width * dst_height);
MemRandomize(tmp_pixels_v, dst_width * dst_height);
MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
ScalePlane(src_pixels_u, benchmark_width_, benchmark_width_,
benchmark_height_,
tmp_pixels_u, dst_width, dst_width, dst_height, kFilterBilinear);
ScalePlane(src_pixels_v, benchmark_width_, benchmark_width_,
benchmark_height_, tmp_pixels_v, dst_width, dst_width, dst_height,
kFilterBilinear);
MergeUVPlane(tmp_pixels_u, dst_width, tmp_pixels_v, dst_width,
dst_pixels_uv_c, dst_width * 2, dst_width, dst_height);
for (int i = 0; i < benchmark_iterations_; ++i) {
HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
benchmark_width_, benchmark_height_);
}
for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
}
free_aligned_buffer_page_end(src_pixels_u);
free_aligned_buffer_page_end(src_pixels_v);
free_aligned_buffer_page_end(tmp_pixels_u);
free_aligned_buffer_page_end(tmp_pixels_v);
free_aligned_buffer_page_end(dst_pixels_uv_opt);
free_aligned_buffer_page_end(dst_pixels_uv_c);
}
} // namespace libyuv