mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Fixes for SplitUVPlane_16 and MergeUVPlane_16
Planar functions pass depth instead of scale factor. Row functions pass shift instead of depth. Add assert to C. AVX shift instruction expects a single shift value in XMM. Neon pass shift as input (not output). Split Neon reimplemented as left shift on shorts by negative to achieve right shift. Add planar unitests Bug: libyuv:888 Change-Id: I8fe62d3d777effc5321c361cd595c58b7f93807e Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2782086 Reviewed-by: richard winterton <rrwinterton@gmail.com> Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
This commit is contained in:
parent
d8f1bfc981
commit
312c02a5aa
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1782
|
||||
Version: 1783
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1782
|
||||
#define LIBYUV_VERSION 1783
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -400,7 +400,7 @@ int I210ToI010(const uint16_t* src_y,
|
||||
}
|
||||
|
||||
// Any I[420]1[02] to P[420]1[02] format with mirroring.
|
||||
static int Ix1xToPx1x(const uint16_t* src_y,
|
||||
static int IxxxToPxxx(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
@ -441,7 +441,7 @@ int I010ToP010(const uint16_t* src_y,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
return Ix1xToPx1x(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
|
||||
width, height, 1, 1, 10);
|
||||
}
|
||||
@ -459,7 +459,7 @@ int I210ToP210(const uint16_t* src_y,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
return Ix1xToPx1x(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
|
||||
width, height, 1, 0, 10);
|
||||
}
|
||||
@ -477,7 +477,7 @@ int I012ToP012(const uint16_t* src_y,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
return Ix1xToPx1x(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
|
||||
width, height, 1, 1, 12);
|
||||
}
|
||||
@ -495,7 +495,7 @@ int I212ToP212(const uint16_t* src_y,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
return Ix1xToPx1x(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
|
||||
width, height, 1, 0, 12);
|
||||
}
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
|
||||
#include "libyuv/planar_functions.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h> // for memset()
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
@ -563,9 +564,9 @@ void SplitUVPlane_16(const uint16_t* src_uv,
|
||||
int height,
|
||||
int depth) {
|
||||
int y;
|
||||
int scale = 1 << depth;
|
||||
void (*SplitUVRow)(const uint16_t* src_uv, uint16_t* dst_u, uint16_t* dst_v,
|
||||
int scale, int width) = SplitUVRow_16_C;
|
||||
void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u,
|
||||
uint16_t* dst_v, int depth, int width) =
|
||||
SplitUVRow_16_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
@ -583,24 +584,24 @@ void SplitUVPlane_16(const uint16_t* src_uv,
|
||||
}
|
||||
#if defined(HAS_SPLITUVROW_16_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
SplitUVRow = SplitUVRow_16_Any_AVX2;
|
||||
SplitUVRow_16 = SplitUVRow_16_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
SplitUVRow = SplitUVRow_16_AVX2;
|
||||
SplitUVRow_16 = SplitUVRow_16_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SPLITUVROW_16_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
SplitUVRow = SplitUVRow_16_Any_NEON;
|
||||
SplitUVRow_16 = SplitUVRow_16_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
SplitUVRow = SplitUVRow_16_NEON;
|
||||
SplitUVRow_16 = SplitUVRow_16_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
// Copy a row of UV.
|
||||
SplitUVRow(src_uv, dst_u, dst_v, scale, width);
|
||||
SplitUVRow_16(src_uv, dst_u, dst_v, depth, width);
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
src_uv += src_stride_uv;
|
||||
@ -618,9 +619,11 @@ void MergeUVPlane_16(const uint16_t* src_u,
|
||||
int height,
|
||||
int depth) {
|
||||
int y;
|
||||
int scale = 1 << (16 - depth);
|
||||
void (*MergeUVRow)(const uint16_t* src_u, const uint16_t* src_v,
|
||||
uint16_t* dst_uv, int scale, int width) = MergeUVRow_16_C;
|
||||
void (*MergeUVRow_16)(const uint16_t* src_u, const uint16_t* src_v,
|
||||
uint16_t* dst_uv, int depth, int width) =
|
||||
MergeUVRow_16_C;
|
||||
assert(depth >= 8);
|
||||
assert(depth <= 16);
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
@ -636,24 +639,24 @@ void MergeUVPlane_16(const uint16_t* src_u,
|
||||
}
|
||||
#if defined(HAS_MERGEUVROW_16_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
MergeUVRow = MergeUVRow_16_Any_AVX2;
|
||||
MergeUVRow_16 = MergeUVRow_16_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
MergeUVRow = MergeUVRow_16_AVX2;
|
||||
MergeUVRow_16 = MergeUVRow_16_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_16_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MergeUVRow = MergeUVRow_16_Any_NEON;
|
||||
MergeUVRow_16 = MergeUVRow_16_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
MergeUVRow = MergeUVRow_16_NEON;
|
||||
MergeUVRow_16 = MergeUVRow_16_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
// Merge a row of U and V into a row of UV.
|
||||
MergeUVRow(src_u, src_v, dst_uv, scale, width);
|
||||
MergeUVRow_16(src_u, src_v, dst_uv, depth, width);
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_uv += dst_stride_uv;
|
||||
@ -671,7 +674,7 @@ void ConvertToMSBPlane_16(const uint16_t* src_y,
|
||||
int depth) {
|
||||
int y;
|
||||
int scale = 1 << (16 - depth);
|
||||
void (*MultiplyRow)(const uint16_t* src_y, uint16_t* dst_y, int scale,
|
||||
void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale,
|
||||
int width) = MultiplyRow_16_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
@ -688,23 +691,23 @@ void ConvertToMSBPlane_16(const uint16_t* src_y,
|
||||
|
||||
#if defined(HAS_MULTIPLYROW_16_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
MultiplyRow = MultiplyRow_16_Any_AVX2;
|
||||
MultiplyRow_16 = MultiplyRow_16_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
MultiplyRow = MultiplyRow_16_AVX2;
|
||||
MultiplyRow_16 = MultiplyRow_16_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MULTIPLYROW_16_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MultiplyRow = MultiplyRow_16_Any_NEON;
|
||||
MultiplyRow_16 = MultiplyRow_16_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
MultiplyRow = MultiplyRow_16_NEON;
|
||||
MultiplyRow_16 = MultiplyRow_16_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
MultiplyRow(src_y, dst_y, scale, width);
|
||||
MultiplyRow_16(src_y, dst_y, scale, width);
|
||||
src_y += src_stride_y;
|
||||
dst_y += dst_stride_y;
|
||||
}
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h> // For memcpy and memset.
|
||||
|
||||
@ -3045,6 +3046,8 @@ void MergeUVRow_16_C(const uint16_t* src_u,
|
||||
int depth,
|
||||
int width) {
|
||||
int shift = 16 - depth;
|
||||
assert(depth >= 8);
|
||||
assert(depth <= 16);
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_uv[0] = src_u[x] << shift;
|
||||
@ -3061,6 +3064,8 @@ void SplitUVRow_16_C(const uint16_t* src_uv,
|
||||
int width) {
|
||||
int shift = 16 - depth;
|
||||
int x;
|
||||
assert(depth >= 8);
|
||||
assert(depth <= 16);
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_u[x] = src_uv[0] >> shift;
|
||||
dst_v[x] = src_uv[1] >> shift;
|
||||
@ -3098,6 +3103,9 @@ void Convert16To8Row_C(const uint16_t* src_y,
|
||||
int scale,
|
||||
int width) {
|
||||
int x;
|
||||
assert(scale >= 256);
|
||||
assert(scale <= 32768);
|
||||
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_y[x] = clamp255((src_y[x] * scale) >> 16);
|
||||
}
|
||||
|
||||
@ -4728,8 +4728,6 @@ void MergeUVRow_16_AVX2(const uint16_t* src_u,
|
||||
// clang-format off
|
||||
asm volatile (
|
||||
"vmovd %4,%%xmm3 \n"
|
||||
"vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
|
||||
"vbroadcastss %%xmm3,%%xmm3 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
// 16 pixels per loop.
|
||||
@ -4761,7 +4759,7 @@ void MergeUVRow_16_AVX2(const uint16_t* src_u,
|
||||
}
|
||||
#endif // HAS_MERGEUVROW_AVX2
|
||||
|
||||
#ifdef HAS_MERGEUVROW_16_AVX2
|
||||
#ifdef HAS_SPLITUVROW_16_AVX2
|
||||
const uvec8 kSplitUVShuffle16 = {0, 1, 4, 5, 8, 9, 12, 13,
|
||||
2, 3, 6, 7, 10, 11, 14, 15};
|
||||
void SplitUVRow_16_AVX2(const uint16_t* src_uv,
|
||||
@ -4773,8 +4771,6 @@ void SplitUVRow_16_AVX2(const uint16_t* src_uv,
|
||||
// clang-format off
|
||||
asm volatile (
|
||||
"vmovd %4,%%xmm3 \n"
|
||||
"vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n"
|
||||
"vbroadcastss %%xmm3,%%xmm3 \n"
|
||||
"vbroadcastf128 %5,%%ymm4 \n"
|
||||
"sub %1,%2 \n"
|
||||
|
||||
@ -4802,14 +4798,13 @@ void SplitUVRow_16_AVX2(const uint16_t* src_uv,
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(width), // %3
|
||||
"+r"(depth) // %4
|
||||
:
|
||||
"+r"(width) // %3
|
||||
: "r"(depth), // %4
|
||||
"m"(kSplitUVShuffle16) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
|
||||
// clang-format on
|
||||
}
|
||||
#endif // HAS_MERGEUVROW_AVX2
|
||||
#endif // HAS_SPLITUVROW_16_AVX2
|
||||
|
||||
// Use scale to convert lsb formats to msb, depending how many bits there are:
|
||||
// 128 = 9 bits
|
||||
|
||||
@ -3270,32 +3270,22 @@ void SplitUVRow_16_NEON(const uint16_t* src_uv,
|
||||
uint16_t* dst_v,
|
||||
int depth,
|
||||
int width) {
|
||||
int shift = depth - 16; // Negative for right shift.
|
||||
asm volatile(
|
||||
"vdup.32 q0, %3 \n"
|
||||
"vdup.16 q2, %4 \n"
|
||||
"1: \n"
|
||||
"vld2.16 {q1, q2}, [%0]! \n" // load 8 UV
|
||||
"vmovl.u16 q3, d2 \n"
|
||||
"vmovl.u16 q4, d3 \n"
|
||||
"vshl.u32 q3, q3, q0 \n"
|
||||
"vshl.u32 q4, q4, q0 \n"
|
||||
"vmovn.u32 d2, q3 \n"
|
||||
"vmovn.u32 d3, q4 \n"
|
||||
"vmovl.u16 q3, d4 \n"
|
||||
"vmovl.u16 q4, d5 \n"
|
||||
"vshl.u32 q3, q3, q0 \n"
|
||||
"vshl.u32 q4, q4, q0 \n"
|
||||
"vmovn.u32 d4, q3 \n"
|
||||
"vmovn.u32 d5, q4 \n"
|
||||
"subs %4, %4, #8 \n" // 8 src pixels per loop
|
||||
"vst1.16 {q1}, [%1]! \n" // store 8 U pixels
|
||||
"vst1.16 {q2}, [%2]! \n" // store 8 V pixels
|
||||
"vld2.16 {q0, q1}, [%0]! \n" // load 8 UV
|
||||
"vshl.u16 q0, q0, q2 \n"
|
||||
"vshl.u16 q1, q1, q2 \n"
|
||||
"subs %3, %3, #8 \n" // 8 src pixels per loop
|
||||
"vst1.16 {q0}, [%1]! \n" // store 8 U pixels
|
||||
"vst1.16 {q1}, [%2]! \n" // store 8 V pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+r"(depth), // %3
|
||||
"+r"(width) // %4
|
||||
:
|
||||
"+r"(width) // %3
|
||||
: "r"(shift) // %4
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
|
||||
}
|
||||
|
||||
@ -3306,21 +3296,20 @@ void MergeUVRow_16_NEON(const uint16_t* src_u,
|
||||
int width) {
|
||||
int shift = 16 - depth;
|
||||
asm volatile(
|
||||
"vdup.16 q2, %3 \n"
|
||||
"vdup.16 q2, %4 \n"
|
||||
"1: \n"
|
||||
"vld1.16 {q0}, [%0]! \n" // load 8 U
|
||||
"vld1.16 {q1}, [%1]! \n" // load 8 V
|
||||
"vshl.u16 q0, q0, q2 \n"
|
||||
"vshl.u16 q1, q1, q2 \n"
|
||||
"subs %4, %4, #8 \n" // 8 src pixels per loop
|
||||
"subs %3, %3, #8 \n" // 8 src pixels per loop
|
||||
"vst2.16 {q0, q1}, [%2]! \n" // store 8 UV pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_u), // %0
|
||||
"+r"(src_v), // %1
|
||||
"+r"(dst_uv), // %2
|
||||
"+r"(shift), // %3
|
||||
"+r"(width) // %4
|
||||
:
|
||||
"+r"(width) // %3
|
||||
: "r"(shift) // %4
|
||||
: "cc", "memory", "q0", "q1", "q2");
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -2605,6 +2605,64 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
|
||||
free_aligned_buffer_page_end(dst_pixels_c);
|
||||
}
|
||||
|
||||
// 16 bit channel split and merge
|
||||
TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels, kPixels * 2 * 2);
|
||||
align_buffer_page_end(tmp_pixels_u_c, kPixels * 2);
|
||||
align_buffer_page_end(tmp_pixels_v_c, kPixels * 2);
|
||||
align_buffer_page_end(tmp_pixels_u_opt, kPixels * 2);
|
||||
align_buffer_page_end(tmp_pixels_v_opt, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
|
||||
align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
|
||||
MemRandomize(src_pixels, kPixels * 2 * 2);
|
||||
MemRandomize(tmp_pixels_u_c, kPixels * 2);
|
||||
MemRandomize(tmp_pixels_v_c, kPixels * 2);
|
||||
MemRandomize(tmp_pixels_u_opt, kPixels * 2);
|
||||
MemRandomize(tmp_pixels_v_opt, kPixels * 2);
|
||||
MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
|
||||
MemRandomize(dst_pixels_c, kPixels * 2 * 2);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags_);
|
||||
SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
|
||||
(uint16_t*)tmp_pixels_u_c, benchmark_width_,
|
||||
(uint16_t*)tmp_pixels_v_c, benchmark_width_, benchmark_width_,
|
||||
benchmark_height_, 12);
|
||||
MergeUVPlane_16((const uint16_t*)tmp_pixels_u_c, benchmark_width_,
|
||||
(const uint16_t*)tmp_pixels_v_c, benchmark_width_,
|
||||
(uint16_t*)dst_pixels_c, benchmark_width_ * 2,
|
||||
benchmark_width_, benchmark_height_, 12);
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
|
||||
SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
|
||||
(uint16_t*)tmp_pixels_u_opt, benchmark_width_,
|
||||
(uint16_t*)tmp_pixels_v_opt, benchmark_width_,
|
||||
benchmark_width_, benchmark_height_, 12);
|
||||
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
MergeUVPlane_16((const uint16_t*)tmp_pixels_u_opt, benchmark_width_,
|
||||
(const uint16_t*)tmp_pixels_v_opt, benchmark_width_,
|
||||
(uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
|
||||
benchmark_width_, benchmark_height_, 12);
|
||||
}
|
||||
|
||||
for (int i = 0; i < kPixels * 2; ++i) {
|
||||
EXPECT_EQ(tmp_pixels_u_c[i], tmp_pixels_u_opt[i]);
|
||||
EXPECT_EQ(tmp_pixels_v_c[i], tmp_pixels_v_opt[i]);
|
||||
}
|
||||
for (int i = 0; i < kPixels * 2 * 2; ++i) {
|
||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
||||
}
|
||||
free_aligned_buffer_page_end(src_pixels);
|
||||
free_aligned_buffer_page_end(tmp_pixels_u_c);
|
||||
free_aligned_buffer_page_end(tmp_pixels_v_c);
|
||||
free_aligned_buffer_page_end(tmp_pixels_u_opt);
|
||||
free_aligned_buffer_page_end(tmp_pixels_v_opt);
|
||||
free_aligned_buffer_page_end(dst_pixels_opt);
|
||||
free_aligned_buffer_page_end(dst_pixels_c);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
@ -2649,6 +2707,46 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
|
||||
free_aligned_buffer_page_end(dst_pixels_c);
|
||||
}
|
||||
|
||||
// 16 bit channel split
|
||||
TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
align_buffer_page_end(src_pixels, kPixels * 2 * 2);
|
||||
align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_u_opt, kPixels * 2);
|
||||
align_buffer_page_end(dst_pixels_v_opt, kPixels * 2);
|
||||
MemRandomize(src_pixels, kPixels * 2 * 2);
|
||||
MemRandomize(dst_pixels_u_c, kPixels * 2);
|
||||
MemRandomize(dst_pixels_v_c, kPixels * 2);
|
||||
MemRandomize(dst_pixels_u_opt, kPixels * 2);
|
||||
MemRandomize(dst_pixels_v_opt, kPixels * 2);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags_);
|
||||
SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
|
||||
(uint16_t*)dst_pixels_u_c, benchmark_width_,
|
||||
(uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_,
|
||||
benchmark_height_, 10);
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
|
||||
(uint16_t*)dst_pixels_u_opt, benchmark_width_,
|
||||
(uint16_t*)dst_pixels_v_opt, benchmark_width_,
|
||||
benchmark_width_, benchmark_height_, 10);
|
||||
}
|
||||
|
||||
for (int i = 0; i < kPixels * 2; ++i) {
|
||||
EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
|
||||
EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
|
||||
}
|
||||
free_aligned_buffer_page_end(src_pixels);
|
||||
free_aligned_buffer_page_end(dst_pixels_u_c);
|
||||
free_aligned_buffer_page_end(dst_pixels_v_c);
|
||||
free_aligned_buffer_page_end(dst_pixels_u_opt);
|
||||
free_aligned_buffer_page_end(dst_pixels_v_opt);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
|
||||
// Round count up to multiple of 16
|
||||
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user