mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 09:47:45 +08:00
H010Copy function to copy 16 bit planar formats
Bug: libyuv:751 Test: LibYUVConvertTest.H010ToH010_Opt Change-Id: I996d309040a14193a97d05b62ac0b3e1ad1ee74b Reviewed-on: https://chromium-review.googlesource.com/823445 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Cheng Wang <wangcheng@google.com> Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
parent
3b81288ece
commit
5336217f11
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1682
|
||||
Version: 1683
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -77,6 +77,42 @@ int I420Copy(const uint8* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I010 to I010
|
||||
#define I010ToI010 I010Copy
|
||||
#define H010ToH010 I010Copy
|
||||
LIBYUV_API
|
||||
int I010Copy(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint16* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert 10 bit YUV to 8 bit
|
||||
LIBYUV_API
|
||||
int I010ToI420(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I400 (grey) to I420.
|
||||
LIBYUV_API
|
||||
int I400ToI420(const uint8* src_y,
|
||||
|
||||
@ -39,6 +39,15 @@ void CopyPlane_16(const uint16* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void Convert16To8Plane(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int scale, // 16384 for 10 bits
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Set a plane of data to a 32 bit value.
|
||||
LIBYUV_API
|
||||
void SetPlane(uint8* dst_y,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1682
|
||||
#define LIBYUV_VERSION 1683
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -106,6 +106,92 @@ int I420Copy(const uint8* src_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Copy I010 with optional flipping
|
||||
LIBYUV_API
|
||||
int I010Copy(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint16* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
if (dst_y) {
|
||||
CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
// Copy UV planes.
|
||||
CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
|
||||
CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert 10 bit YUV to 8 bit
|
||||
LIBYUV_API
|
||||
int I010ToI420(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
// Convert Y plane.
|
||||
Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width,
|
||||
height);
|
||||
// Convert UV planes.
|
||||
Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth,
|
||||
halfheight);
|
||||
Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 422 chroma is 1/2 width, 1x height
|
||||
// 420 chroma is 1/2 width, 1/2 height
|
||||
LIBYUV_API
|
||||
|
||||
@ -1240,8 +1240,8 @@ int ConvertFromI420(const uint8* y,
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
dst_sample_stride ? dst_sample_stride : width * 2, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
@ -1255,8 +1255,8 @@ int ConvertFromI420(const uint8* y,
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 3,
|
||||
width, height);
|
||||
dst_sample_stride ? dst_sample_stride : width * 3, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
|
||||
@ -50,6 +50,7 @@ void CopyPlane(const uint8* src_y,
|
||||
if (src_y == dst_y && src_stride_y == dst_stride_y) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
||||
@ -120,6 +121,56 @@ void CopyPlane_16(const uint16* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
// Convert a plane of 16 bit data to 8 bit
|
||||
LIBYUV_API
|
||||
void Convert16To8Plane(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int scale, // 16384 for 10 bits
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale,
|
||||
int width) = Convert16To8Row_C;
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||
dst_stride_y = -dst_stride_y;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width && dst_stride_y == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = dst_stride_y = 0;
|
||||
}
|
||||
#if defined(HAS_CONVERT16TO8ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
Convert16To8Row = Convert16To8Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
Convert16To8Row = Convert16To8Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_CONVERT16TO8ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
Convert16To8Row = Convert16To8Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
Convert16To8Row = Convert16To8Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Copy plane
|
||||
for (y = 0; y < height; ++y) {
|
||||
Convert16To8Row(src_y, dst_y, scale, width);
|
||||
src_y += src_stride_y;
|
||||
dst_y += dst_stride_y;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy I422.
|
||||
LIBYUV_API
|
||||
int I422Copy(const uint8* src_y,
|
||||
|
||||
@ -1856,7 +1856,7 @@ void Convert16To8Row_C(const uint16* src_y,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_y[x] = (src_y[x] * scale) >> 16;
|
||||
dst_y[x] = clamp255((src_y[x] * scale) >> 16);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -2956,20 +2956,20 @@ void Convert16To8Row_SSSE3(const uint16* src_y,
|
||||
int width) {
|
||||
// clang-format off
|
||||
asm volatile (
|
||||
"movd %3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm3,%%xmm3 \n"
|
||||
"pshufd $0x0,%%xmm3,%%xmm3 \n"
|
||||
"movd %3,%%xmm2 \n"
|
||||
"punpcklwd %%xmm2,%%xmm2 \n"
|
||||
"pshufd $0x0,%%xmm2,%%xmm2 \n"
|
||||
|
||||
// 32 pixels per loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"pmulhuw %%xmm3,%%xmm0 \n"
|
||||
"pmulhuw %%xmm3,%%xmm1 \n"
|
||||
"add $0x20,%0 \n"
|
||||
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||
"pmulhuw %%xmm2,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"add $0x20,%0 \n"
|
||||
"add $0x10,%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
@ -2977,7 +2977,7 @@ void Convert16To8Row_SSSE3(const uint16* src_y,
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"(scale) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm3");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2");
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
@ -2988,22 +2988,21 @@ void Convert16To8Row_AVX2(const uint16* src_y,
|
||||
int width) {
|
||||
// clang-format off
|
||||
asm volatile (
|
||||
"vmovd %3,%%xmm3 \n"
|
||||
"vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
|
||||
"vbroadcastss %%xmm3,%%ymm3 \n"
|
||||
"vmovd %3,%%xmm2 \n"
|
||||
"vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n"
|
||||
"vbroadcastss %%xmm2,%%ymm2 \n"
|
||||
|
||||
// 32 pixels per loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"vpmulhuw %%ymm3,%%ymm0,%%ymm0 \n"
|
||||
"vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
|
||||
"add $0x40,%0 \n"
|
||||
"vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"add $0x40,%0 \n"
|
||||
"add $0x20,%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
@ -3012,7 +3011,7 @@ void Convert16To8Row_AVX2(const uint16* src_y,
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"(scale) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm3");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2");
|
||||
// clang-format on
|
||||
}
|
||||
#endif // HAS_MULTIPLYROW_16_AVX2
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
@ -35,98 +36,71 @@ namespace libyuv {
|
||||
|
||||
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
|
||||
|
||||
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
// Planar test
|
||||
|
||||
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
|
||||
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||
assert(SRC_BPC == 1 || SRC_BPC == 2); \
|
||||
assert(DST_BPC == 1 || DST_BPC == 2); \
|
||||
assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2); \
|
||||
assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2); \
|
||||
assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2); \
|
||||
assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2); \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
||||
align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||
OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
for (int i = 0; i < kHeight; ++i) \
|
||||
for (int j = 0; j < kWidth; ++j) \
|
||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
|
||||
src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
|
||||
(fastrand() & 0xff); \
|
||||
} \
|
||||
} \
|
||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||
memset(dst_u_c, 2, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_v_c, 3, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||
memset(dst_u_opt, 102, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
memset(dst_v_opt, 103, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
|
||||
const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
||||
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
|
||||
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
|
||||
align_buffer_page_end(src_u, \
|
||||
kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
|
||||
align_buffer_page_end(src_v, \
|
||||
kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
|
||||
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
||||
align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
||||
MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
|
||||
MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
||||
MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
||||
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
|
||||
memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
||||
memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
||||
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
|
||||
memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
||||
memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
|
||||
dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
reinterpret_cast<SRC_T*>(src_y + OFF), kWidth, \
|
||||
reinterpret_cast<SRC_T*>(src_u + OFF), kSrcHalfWidth, \
|
||||
reinterpret_cast<SRC_T*>(src_v + OFF), kSrcHalfWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \
|
||||
NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
|
||||
dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \
|
||||
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
|
||||
reinterpret_cast<SRC_T*>(src_y + OFF), kWidth, \
|
||||
reinterpret_cast<SRC_T*>(src_u + OFF), kSrcHalfWidth, \
|
||||
reinterpret_cast<SRC_T*>(src_v + OFF), kSrcHalfWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \
|
||||
reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \
|
||||
NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
for (int j = 0; j < kWidth; ++j) { \
|
||||
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
|
||||
static_cast<int>(dst_y_opt[i * kWidth + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
|
||||
EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
|
||||
} \
|
||||
EXPECT_EQ(0, max_diff); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = abs( \
|
||||
static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \
|
||||
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \
|
||||
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 3); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = abs( \
|
||||
static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
|
||||
static_cast<int>( \
|
||||
dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 3); \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_u_c); \
|
||||
free_aligned_buffer_page_end(dst_v_c); \
|
||||
@ -138,25 +112,32 @@ namespace libyuv {
|
||||
free_aligned_buffer_page_end(src_v); \
|
||||
}
|
||||
|
||||
#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
|
||||
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \
|
||||
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1) \
|
||||
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \
|
||||
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0)
|
||||
#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
|
||||
DST_SUBSAMP_X, DST_SUBSAMP_Y) \
|
||||
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_ - 4, _Any, +, 0) \
|
||||
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Unaligned, +, 1) \
|
||||
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Invert, -, 0) \
|
||||
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
|
||||
benchmark_width_, _Opt, +, 0)
|
||||
|
||||
TESTPLANARTOP(I420, 2, 2, I420, 2, 2)
|
||||
TESTPLANARTOP(I422, 2, 1, I420, 2, 2)
|
||||
TESTPLANARTOP(I444, 1, 1, I420, 2, 2)
|
||||
TESTPLANARTOP(I420, 2, 2, I422, 2, 1)
|
||||
TESTPLANARTOP(I420, 2, 2, I444, 1, 1)
|
||||
TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2)
|
||||
TESTPLANARTOP(I422, 2, 1, I422, 2, 1)
|
||||
TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
|
||||
TESTPLANARTOP(I420, uint8, 1, 2, 2, I420, uint8, 1, 2, 2)
|
||||
TESTPLANARTOP(I422, uint8, 1, 2, 1, I420, uint8, 1, 2, 2)
|
||||
TESTPLANARTOP(I444, uint8, 1, 1, 1, I420, uint8, 1, 2, 2)
|
||||
TESTPLANARTOP(I420, uint8, 1, 2, 2, I422, uint8, 1, 2, 1)
|
||||
TESTPLANARTOP(I420, uint8, 1, 2, 2, I444, uint8, 1, 1, 1)
|
||||
TESTPLANARTOP(I420, uint8, 1, 2, 2, I420Mirror, uint8, 1, 2, 2)
|
||||
TESTPLANARTOP(I422, uint8, 1, 2, 1, I422, uint8, 1, 2, 1)
|
||||
TESTPLANARTOP(I444, uint8, 1, 1, 1, I444, uint8, 1, 1, 1)
|
||||
TESTPLANARTOP(I010, uint16, 2, 2, 2, I010, uint16, 2, 2, 2)
|
||||
TESTPLANARTOP(I010, uint16, 2, 2, 2, I420, uint8, 1, 2, 2)
|
||||
|
||||
// Test Android 420 to I420
|
||||
#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
|
||||
|
||||
@ -2699,6 +2699,37 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
|
||||
}
|
||||
#endif // HAS_MULTIPLYROW_16_AVX2
|
||||
|
||||
TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
align_buffer_page_end(src_pixels_y, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_y_opt, kPixels);
|
||||
align_buffer_page_end(dst_pixels_y_c, kPixels);
|
||||
|
||||
MemRandomize(src_pixels_y, kPixels * 2);
|
||||
memset(dst_pixels_y_opt, 0, kPixels);
|
||||
memset(dst_pixels_y_c, 1, kPixels);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags_);
|
||||
Convert16To8Plane(reinterpret_cast<const uint16*>(src_pixels_y),
|
||||
benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
|
||||
benchmark_width_, benchmark_height_);
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
Convert16To8Plane(reinterpret_cast<const uint16*>(src_pixels_y),
|
||||
benchmark_width_, dst_pixels_y_opt, benchmark_width_,
|
||||
16384, benchmark_width_, benchmark_height_);
|
||||
}
|
||||
|
||||
for (int i = 0; i < kPixels; ++i) {
|
||||
EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(src_pixels_y);
|
||||
free_aligned_buffer_page_end(dst_pixels_y_opt);
|
||||
free_aligned_buffer_page_end(dst_pixels_y_c);
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Improve test for more platforms.
|
||||
#ifdef HAS_CONVERT16TO8ROW_AVX2
|
||||
TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
|
||||
|
||||
@ -19,10 +19,6 @@
|
||||
#endif
|
||||
#include "libyuv/cpu_id.h"
|
||||
|
||||
// Change this to 1000 for benchmarking.
|
||||
// TODO(fbarchard): Add command line parsing to pass this as option.
|
||||
#define BENCHMARK_ITERATIONS 1
|
||||
|
||||
unsigned int fastrand_seed = 0xfb;
|
||||
|
||||
#ifdef LIBYUV_USE_GFLAGS
|
||||
@ -47,7 +43,7 @@ static const int32 FLAGS_libyuv_cpu_info = 0;
|
||||
// Set flags to -1 for benchmarking to avoid slower C code.
|
||||
|
||||
LibYUVConvertTest::LibYUVConvertTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
: benchmark_iterations_(1),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
@ -92,12 +88,6 @@ LibYUVConvertTest::LibYUVConvertTest()
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
libyuv::MaskCpuFlags(benchmark_cpu_info_);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
@ -107,7 +97,7 @@ LibYUVConvertTest::LibYUVConvertTest()
|
||||
}
|
||||
|
||||
LibYUVColorTest::LibYUVColorTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
: benchmark_iterations_(1),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
@ -152,12 +142,6 @@ LibYUVColorTest::LibYUVColorTest()
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
libyuv::MaskCpuFlags(benchmark_cpu_info_);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
@ -167,7 +151,7 @@ LibYUVColorTest::LibYUVColorTest()
|
||||
}
|
||||
|
||||
LibYUVScaleTest::LibYUVScaleTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
: benchmark_iterations_(1),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
@ -212,12 +196,6 @@ LibYUVScaleTest::LibYUVScaleTest()
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
libyuv::MaskCpuFlags(benchmark_cpu_info_);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
@ -227,7 +205,7 @@ LibYUVScaleTest::LibYUVScaleTest()
|
||||
}
|
||||
|
||||
LibYUVRotateTest::LibYUVRotateTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
: benchmark_iterations_(1),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
@ -272,12 +250,6 @@ LibYUVRotateTest::LibYUVRotateTest()
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
libyuv::MaskCpuFlags(benchmark_cpu_info_);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
@ -287,7 +259,7 @@ LibYUVRotateTest::LibYUVRotateTest()
|
||||
}
|
||||
|
||||
LibYUVPlanarTest::LibYUVPlanarTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
: benchmark_iterations_(1),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
@ -332,12 +304,6 @@ LibYUVPlanarTest::LibYUVPlanarTest()
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
libyuv::MaskCpuFlags(benchmark_cpu_info_);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
@ -347,7 +313,7 @@ LibYUVPlanarTest::LibYUVPlanarTest()
|
||||
}
|
||||
|
||||
LibYUVBaseTest::LibYUVBaseTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
: benchmark_iterations_(1),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
@ -392,12 +358,6 @@ LibYUVBaseTest::LibYUVBaseTest()
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
libyuv::MaskCpuFlags(benchmark_cpu_info_);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
@ -407,7 +367,7 @@ LibYUVBaseTest::LibYUVBaseTest()
|
||||
}
|
||||
|
||||
LibYUVCompareTest::LibYUVCompareTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
: benchmark_iterations_(1),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
@ -452,12 +412,6 @@ LibYUVCompareTest::LibYUVCompareTest()
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
libyuv::MaskCpuFlags(benchmark_cpu_info_);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user