mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
H010ToAR30 for 10 bit bt.709 YUV to 30 bit RGB
This version of the H010ToAR30 provides a 3 step conversion Convert16To8Row_AVX2 H420ToARGB_AVX2 ARGBToAR30_AVX2 Low level function added to convert 16 bit to 8 bit using multiply to adjust 10 bit or other bit depths and then save the upper 16 bits. Bug: libyuv:751 Test: LibYUVPlanarTest.Convert16To8Row_Opt unittest added Change-Id: I9cc576fda8afa1003cb961d03e0e656e0b478f03 Reviewed-on: https://chromium-review.googlesource.com/783554 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
parent
a98d6cdb17
commit
26173eb73e
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1679
|
||||
Version: 1680
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -321,6 +321,19 @@ int H422ToABGR(const uint8* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H010 to AR30.
|
||||
LIBYUV_API
|
||||
int H010ToAR30(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int BGRAToARGB(const uint8* src_frame,
|
||||
|
||||
@ -278,6 +278,7 @@ extern "C" {
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
|
||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_ARGBTOAR30ROW_AVX2
|
||||
#define HAS_CONVERT16TO8ROW_AVX2
|
||||
#define HAS_MERGEUVROW_16_AVX2
|
||||
#define HAS_MULTIPLYROW_16_AVX2
|
||||
#endif
|
||||
@ -1540,6 +1541,12 @@ void MultiplyRow_16_AVX2(const uint16* src_y,
|
||||
int width);
|
||||
void MultiplyRow_16_C(const uint16* src_y, uint16* dst_y, int scale, int width);
|
||||
|
||||
void Convert16To8Row_AVX2(const uint16* src_y,
|
||||
uint8* dst_y,
|
||||
int scale,
|
||||
int width);
|
||||
void Convert16To8Row_C(const uint16* src_y, uint8* dst_y, int scale, int width);
|
||||
|
||||
void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
|
||||
void CopyRow_AVX(const uint8* src, uint8* dst, int count);
|
||||
void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
|
||||
@ -2419,9 +2426,7 @@ void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb,
|
||||
void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb,
|
||||
uint8* dst_rgb,
|
||||
int width);
|
||||
void ARGBToAR30Row_Any_AVX2(const uint8* src_argb,
|
||||
uint8* dst_rgb,
|
||||
int width);
|
||||
void ARGBToAR30Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
|
||||
void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1679
|
||||
#define LIBYUV_VERSION 1680
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -93,6 +93,7 @@ enum FourCC {
|
||||
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
|
||||
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
|
||||
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
|
||||
FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb
|
||||
|
||||
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
|
||||
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
|
||||
@ -154,6 +155,7 @@ enum FourCCBpp {
|
||||
FOURCC_BPP_J420 = 12,
|
||||
FOURCC_BPP_J400 = 8,
|
||||
FOURCC_BPP_H420 = 12,
|
||||
FOURCC_BPP_H010 = 24,
|
||||
FOURCC_BPP_MJPG = 0, // 0 means unknown.
|
||||
FOURCC_BPP_H264 = 0,
|
||||
FOURCC_BPP_IYUV = 12,
|
||||
|
||||
@ -428,6 +428,136 @@ int H422ToABGR(const uint8* src_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert 10 bit YUV to 10 bit RGB with matrix
|
||||
static int H010ToAR30Matrix(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int scale, // 16384 for 10 bits
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale,
|
||||
int width) = Convert16To8Row_C;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I422ToARGBRow_C;
|
||||
void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToAR30Row_C;
|
||||
|
||||
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
|
||||
dst_stride_ar30 = -dst_stride_ar30;
|
||||
}
|
||||
|
||||
#if defined(HAS_CONVERT16TO8ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
Convert16To8Row = Convert16To8Row_C; // TODO(fbarchard): Any AVX2
|
||||
if (IS_ALIGNED(width, 64)) {
|
||||
Convert16To8Row = Convert16To8Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOAR30ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToAR30Row = ARGBToAR30Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToAR30Row = ARGBToAR30Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
align_buffer_64(row_y, width);
|
||||
align_buffer_64(row_u, halfwidth);
|
||||
align_buffer_64(row_v, halfwidth);
|
||||
align_buffer_64(row_argb, width * 4);
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
Convert16To8Row(src_y, row_y, scale, width);
|
||||
Convert16To8Row(src_u, row_u, scale, halfwidth);
|
||||
Convert16To8Row(src_v, row_v, scale, halfwidth);
|
||||
|
||||
I422ToARGBRow(row_y, row_u, row_v, row_argb, yuvconstants, width);
|
||||
|
||||
ARGBToAR30Row(row_argb, dst_ar30, width);
|
||||
|
||||
dst_ar30 += dst_stride_ar30;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
free_aligned_buffer_64(row_y);
|
||||
free_aligned_buffer_64(row_u);
|
||||
free_aligned_buffer_64(row_v);
|
||||
free_aligned_buffer_64(row_argb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert H010 to AR30.
|
||||
LIBYUV_API
|
||||
int H010ToAR30(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height) {
|
||||
return H010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_ar30, dst_stride_ar30,
|
||||
&kYuvH709Constants, 16384, width, height);
|
||||
}
|
||||
|
||||
// Convert I444 to ARGB with matrix
|
||||
static int I444ToARGBMatrix(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
|
||||
@ -1811,6 +1811,11 @@ void MergeRGBRow_C(const uint8* src_r,
|
||||
}
|
||||
}
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
// 128 = 9 bits
|
||||
// 64 = 10 bits
|
||||
// 16 = 12 bits
|
||||
// 1 = 16 bits
|
||||
void MergeUVRow_16_C(const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint16* dst_uv,
|
||||
@ -1840,6 +1845,21 @@ void MultiplyRow_16_C(const uint16* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
// 32768 = 9 bits
|
||||
// 16384 = 10 bits
|
||||
// 4096 = 12 bits
|
||||
// 256 = 16 bits
|
||||
void Convert16To8Row_C(const uint16* src_y,
|
||||
uint8* dst_y,
|
||||
int scale,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_y[x] = (src_y[x] * scale) >> 16;
|
||||
}
|
||||
}
|
||||
|
||||
void CopyRow_C(const uint8* src, uint8* dst, int count) {
|
||||
memcpy(dst, src, count);
|
||||
}
|
||||
|
||||
@ -702,52 +702,51 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
|
||||
#ifdef HAS_ARGBTOAR30ROW_AVX2
|
||||
void ARGBToAR30Row_AVX2(const uint8* src, uint8* dst, int width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 0x000000ff mask
|
||||
"vpsrld $0x18,%%ymm4,%%ymm4 \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0xc0000000 mask
|
||||
"vpslld $30,%%ymm5,%%ymm5 \n"
|
||||
asm volatile(
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 0x000000ff mask
|
||||
"vpsrld $0x18,%%ymm4,%%ymm4 \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0xc0000000 mask
|
||||
"vpslld $30,%%ymm5,%%ymm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
// alpha
|
||||
"vpand %%ymm5,%%ymm0,%%ymm3 \n"
|
||||
// red
|
||||
"vpsrld $0x10,%%ymm0,%%ymm1 \n"
|
||||
"vpand %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
|
||||
"vpslld $22,%%ymm1,%%ymm1 \n"
|
||||
"vpslld $20,%%ymm2,%%ymm2 \n"
|
||||
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
|
||||
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
|
||||
//green
|
||||
"vpsrld $0x08,%%ymm0,%%ymm1 \n"
|
||||
"vpand %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
|
||||
"vpslld $12,%%ymm1,%%ymm1 \n"
|
||||
"vpslld $10,%%ymm2,%%ymm2 \n"
|
||||
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
|
||||
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
|
||||
//blue
|
||||
"vpand %%ymm4,%%ymm0,%%ymm1 \n"
|
||||
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
|
||||
"vpslld $2,%%ymm1,%%ymm1 \n"
|
||||
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
|
||||
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
// alpha
|
||||
"vpand %%ymm5,%%ymm0,%%ymm3 \n"
|
||||
// red
|
||||
"vpsrld $0x10,%%ymm0,%%ymm1 \n"
|
||||
"vpand %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
|
||||
"vpslld $22,%%ymm1,%%ymm1 \n"
|
||||
"vpslld $20,%%ymm2,%%ymm2 \n"
|
||||
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
|
||||
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
|
||||
// green
|
||||
"vpsrld $0x08,%%ymm0,%%ymm1 \n"
|
||||
"vpand %%ymm4,%%ymm1,%%ymm1 \n"
|
||||
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
|
||||
"vpslld $12,%%ymm1,%%ymm1 \n"
|
||||
"vpslld $10,%%ymm2,%%ymm2 \n"
|
||||
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
|
||||
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
|
||||
// blue
|
||||
"vpand %%ymm4,%%ymm0,%%ymm1 \n"
|
||||
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
|
||||
"vpslld $2,%%ymm1,%%ymm1 \n"
|
||||
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
|
||||
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
|
||||
|
||||
"vmovdqu %%ymm3,(%1) \n"
|
||||
"add $0x20,%0 \n"
|
||||
"add $0x20,%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
);
|
||||
"vmovdqu %%ymm3,(%1) \n"
|
||||
"add $0x20,%0 \n"
|
||||
"add $0x20,%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
::"memory",
|
||||
"cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2851,6 +2850,11 @@ void MergeUVRow_16_AVX2(const uint16* src_u,
|
||||
}
|
||||
#endif // HAS_MERGEUVROW_AVX2
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
// 128 = 9 bits
|
||||
// 64 = 10 bits
|
||||
// 16 = 12 bits
|
||||
// 1 = 16 bits
|
||||
#ifdef HAS_MULTIPLYROW_16_AVX2
|
||||
void MultiplyRow_16_AVX2(const uint16* src_y,
|
||||
uint16* dst_y,
|
||||
@ -2885,6 +2889,47 @@ void MultiplyRow_16_AVX2(const uint16* src_y,
|
||||
}
|
||||
#endif // HAS_MULTIPLYROW_16_AVX2
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
// 32768 = 9 bits
|
||||
// 16384 = 10 bits
|
||||
// 4096 = 12 bits
|
||||
// 256 = 16 bits
|
||||
#ifdef HAS_MULTIPLYROW_16_AVX2
|
||||
void Convert16To8Row_AVX2(const uint16* src_y,
|
||||
uint8* dst_y,
|
||||
int scale,
|
||||
int width) {
|
||||
// clang-format off
|
||||
asm volatile (
|
||||
"vmovd %3,%%xmm3 \n"
|
||||
"vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
|
||||
"vbroadcastss %%xmm3,%%ymm3 \n"
|
||||
|
||||
// 32 pixels per loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"vpmulhuw %%ymm3,%%ymm0,%%ymm0 \n"
|
||||
"vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
|
||||
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"add $0x40,%0 \n"
|
||||
"add $0x20,%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"(scale) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm3");
|
||||
// clang-format on
|
||||
}
|
||||
#endif // HAS_MULTIPLYROW_16_AVX2
|
||||
|
||||
#ifdef HAS_SPLITRGBROW_SSSE3
|
||||
|
||||
// Shuffle table for converting RGB to Planar.
|
||||
|
||||
@ -1963,4 +1963,67 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
|
||||
}
|
||||
#endif // HAS_ARGBTOAR30ROW_AVX2
|
||||
|
||||
// Alias to copy pixels as is
|
||||
#define AR30ToAR30 ARGBToARGB
|
||||
|
||||
#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
ALIGN, YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, \
|
||||
BPP_C) \
|
||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||
const int kBpc = 2; \
|
||||
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \
|
||||
align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \
|
||||
align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \
|
||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
|
||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
|
||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||
reinterpret_cast<uint16*>(src_y)[i + OFF] = (fastrand() & 0x3ff); \
|
||||
} \
|
||||
for (int i = 0; i < kSizeUV; ++i) { \
|
||||
reinterpret_cast<uint16*>(src_u)[i + OFF] = (fastrand() & 0x3ff); \
|
||||
reinterpret_cast<uint16*>(src_v)[i + OFF] = (fastrand() & 0x3ff); \
|
||||
} \
|
||||
memset(dst_argb_c + OFF, 1, kStrideB * kHeight); \
|
||||
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16*>(src_y) + OFF, kWidth, \
|
||||
reinterpret_cast<uint16*>(src_u) + OFF, kStrideUV, \
|
||||
reinterpret_cast<uint16*>(src_v) + OFF, kStrideUV, \
|
||||
dst_argb_c + OFF, kStrideB, kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16*>(src_y) + OFF, kWidth, \
|
||||
reinterpret_cast<uint16*>(src_u) + OFF, kStrideUV, \
|
||||
reinterpret_cast<uint16*>(src_v) + OFF, kStrideUV, \
|
||||
dst_argb_opt + OFF, kStrideB, kWidth, \
|
||||
NEG kHeight); \
|
||||
} \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \
|
||||
int abs_diff = abs(static_cast<int>(dst_argb_c[i]) - \
|
||||
static_cast<int>(dst_argb_opt[i])); \
|
||||
if (abs_diff > max_diff) { \
|
||||
max_diff = abs_diff; \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, DIFF); \
|
||||
free_aligned_buffer_page_end(src_y); \
|
||||
free_aligned_buffer_page_end(src_u); \
|
||||
free_aligned_buffer_page_end(src_v); \
|
||||
free_aligned_buffer_page_end(dst_argb_c); \
|
||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||
}
|
||||
|
||||
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, DIFF, FMT_C, BPP_C) \
|
||||
TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
|
||||
|
||||
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2, AR30, 4)
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
@ -2661,7 +2661,7 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO(fbarchard): improve test for platforms and cpu detect
|
||||
// TODO(fbarchard): Improve test for more platforms.
|
||||
#ifdef HAS_MULTIPLYROW_16_AVX2
|
||||
TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
@ -2697,7 +2697,48 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
|
||||
free_aligned_buffer_page_end(dst_pixels_y_opt);
|
||||
free_aligned_buffer_page_end(dst_pixels_y_c);
|
||||
}
|
||||
#endif
|
||||
#endif // HAS_MULTIPLYROW_16_AVX2
|
||||
|
||||
// TODO(fbarchard): Improve test for more platforms.
|
||||
#ifdef HAS_CONVERT16TO8ROW_AVX2
|
||||
TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
|
||||
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||
align_buffer_page_end(src_pixels_y, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_y_opt, kPixels);
|
||||
align_buffer_page_end(dst_pixels_y_c, kPixels);
|
||||
|
||||
MemRandomize(src_pixels_y, kPixels * 2);
|
||||
// C code does not clamp so limit source range to 10 bits.
|
||||
for (int i = 0; i < kPixels; ++i) {
|
||||
reinterpret_cast<uint16*>(src_pixels_y)[i] &= 1023;
|
||||
}
|
||||
|
||||
memset(dst_pixels_y_opt, 0, kPixels);
|
||||
memset(dst_pixels_y_c, 1, kPixels);
|
||||
|
||||
Convert16To8Row_C(reinterpret_cast<const uint16*>(src_pixels_y),
|
||||
dst_pixels_y_c, 16384, kPixels);
|
||||
|
||||
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
if (has_avx2) {
|
||||
Convert16To8Row_AVX2(reinterpret_cast<const uint16*>(src_pixels_y),
|
||||
dst_pixels_y_opt, 16384, kPixels);
|
||||
} else {
|
||||
Convert16To8Row_C(reinterpret_cast<const uint16*>(src_pixels_y),
|
||||
dst_pixels_y_opt, 16384, kPixels);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < kPixels; ++i) {
|
||||
EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(src_pixels_y);
|
||||
free_aligned_buffer_page_end(dst_pixels_y_opt);
|
||||
free_aligned_buffer_page_end(dst_pixels_y_c);
|
||||
}
|
||||
#endif // HAS_CONVERT16TO8ROW_AVX2
|
||||
|
||||
float TestScaleMaxSamples(int benchmark_width,
|
||||
int benchmark_height,
|
||||
|
||||
@ -80,6 +80,8 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user