Add support for odd height YUVA alpha blending.

R=dhrosa@google.com, harryjin@google.com
BUG=libyuv:527

Review URL: https://codereview.chromium.org/1507683003 .
This commit is contained in:
Frank Barchard 2015-12-07 12:03:20 -08:00
parent b0b22f88b9
commit 2657688e70
4 changed files with 34 additions and 17 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1548
Version: 1549
License: BSD
License File: LICENSE

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1548
#define LIBYUV_VERSION 1549
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -651,6 +651,8 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
uint8* dst_v, int dst_stride_v,
int width, int height) {
int y;
// Half width/height for UV.
int halfwidth = (width + 1) >> 1;
void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
@ -674,15 +676,11 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
dst_y, dst_stride_y,
width, height);
// Half width/height for UV.
width = (width + 1) >> 1;
height = (height + 1) >> 1;
#if defined(HAS_BLENDPLANEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
// TODO(fbarchard): Implement any versions for odd width.
// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
if (IS_ALIGNED(halfwidth, 8)) {
BlendPlaneRow = BlendPlaneRow_SSSE3;
}
}
@ -690,7 +688,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_BLENDPLANEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
if (IS_ALIGNED(halfwidth, 16)) {
BlendPlaneRow = BlendPlaneRow_AVX2;
}
}
@ -698,7 +696,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
if (IS_ALIGNED(width, 16)) {
if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_NEON;
}
}
@ -706,7 +704,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_SSE2;
}
}
@ -714,20 +712,24 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_SCALEROWDOWN2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
if (IS_ALIGNED(halfwidth, 32)) {
ScaleRowDown2 = ScaleRowDown2Box_AVX2;
}
}
#endif
// Row buffer for intermediate alpha pixels.
align_buffer_64(halfalpha, width);
for (y = 0; y < height; ++y) {
align_buffer_64(halfalpha, halfwidth);
for (y = 0; y < height; y += 2) {
// last row of odd height image use 1 row of alpha instead of 2.
if (y == (height - 1)) {
alpha_stride = 0;
}
// Subsample 2 rows of UV to half width and half height.
ScaleRowDown2(alpha, alpha_stride, halfalpha, width);
ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
alpha += alpha_stride * 2;
BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width);
BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width);
BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
src_u0 += src_stride_u0;
src_u1 += src_stride_u1;
dst_u += dst_stride_u;

View File

@ -1164,7 +1164,6 @@ TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
}
#ifdef HAS_BLENDPLANEROW_AVX2
// TODO(fbarchard): Switch to I420Blend.
static void TestBlendPlaneRow(int width, int height, int benchmark_iterations,
int invert, int off) {
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
@ -1348,6 +1347,14 @@ TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
TestBlendPlane(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
}
#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
@ -1442,6 +1449,14 @@ TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
TEST_F(LibYUVPlanarTest, I420Blend_Any) {
TestI420Blend(benchmark_width_ - 4, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
}
TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
}
TEST_F(LibYUVPlanarTest, TestAffine) {
SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);