From 545a51c1d3c0d28a614b224375bbf00edf81c057 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Tue, 3 Dec 2013 00:27:29 +0000 Subject: [PATCH] use scale for subsampling to handle odd source width to even destination width. BUG=289 TEST=drmemory R=nfullagar@google.com, ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/4779004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@884 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/convert.cc | 127 +++++++++----------------------------- source/row_posix.cc | 1 + unit_test/convert_test.cc | 6 +- 5 files changed, 35 insertions(+), 103 deletions(-) diff --git a/README.chromium b/README.chromium index cd77ad11c..41b410ae2 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 883 +Version: 884 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 6c55976af..ce3a723e2 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 883 +#define LIBYUV_VERSION 884 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index e79ff3c0a..4c2a90156 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -58,6 +58,8 @@ int I420Copy(const uint8* src_y, int src_stride_y, return 0; } +// 422 chroma is 1/2 width, 1x height +// 420 chroma is 1/2 width, 1/2 height LIBYUV_API int I422ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, @@ -81,73 +83,29 @@ int I422ToI420(const uint8* src_y, int src_stride_y, src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } - int halfwidth = (width + 1) >> 1; - void (*HalfRow)(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) = HalfRow_C; -#if defined(HAS_HALFROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 16) && - IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && - IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - HalfRow = HalfRow_SSE2; - } -#endif -#if defined(HAS_HALFROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(halfwidth, 32)) { - HalfRow = HalfRow_AVX2; - } -#endif -#if defined(HAS_HALFROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { - HalfRow = HalfRow_NEON; - } -#endif // Copy Y plane if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } - // SubSample U plane. - int y; - for (y = 0; y < height - 1; y += 2) { - HalfRow(src_u, src_stride_u, dst_u, halfwidth); - src_u += src_stride_u * 2; - dst_u += dst_stride_u; - } - if (height & 1) { - HalfRow(src_u, 0, dst_u, halfwidth); - } + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; - // SubSample V plane. - for (y = 0; y < height - 1; y += 2) { - HalfRow(src_v, src_stride_v, dst_v, halfwidth); - src_v += src_stride_v * 2; - dst_v += dst_stride_v; - } - if (height & 1) { - HalfRow(src_v, 0, dst_v, halfwidth); - } + // Resample U plane. + ScalePlane(src_u, src_stride_u, halfwidth, height, + dst_u, dst_stride_u, halfwidth, halfheight, + kFilterBilinear); + + // Resample V plane. + ScalePlane(src_v, src_stride_v, halfwidth, height, + dst_v, dst_stride_v, halfwidth, halfheight, + kFilterBilinear); return 0; } -// Blends 32x2 pixels to 16x1 -// source in scale.cc -#if !defined(LIBYUV_DISABLE_NEON) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_SCALEROWDOWN2_NEON -void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -#elif !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) - -void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -#endif -void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - +// 444 chroma is 1x width, 1x height +// 420 chroma is 1/2 width, 1/2 height LIBYUV_API int I444ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, @@ -171,54 +129,27 @@ int I444ToI420(const uint8* src_y, int src_stride_y, src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } - int halfwidth = (width + 1) >> 1; - void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C; -#if defined(HAS_SCALEROWDOWN2_NEON) - if (TestCpuFlag(kCpuHasNEON) && - IS_ALIGNED(halfwidth, 16)) { - ScaleRowDown2 = ScaleRowDown2Box_NEON; - } -#elif defined(HAS_SCALEROWDOWN2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(halfwidth, 16) && - IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && - IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - ScaleRowDown2 = ScaleRowDown2Box_SSE2; - } -#endif // Copy Y plane if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } - // SubSample U plane. - int y; - for (y = 0; y < height - 1; y += 2) { - ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth); - src_u += src_stride_u * 2; - dst_u += dst_stride_u; - } - if (height & 1) { - ScaleRowDown2(src_u, 0, dst_u, halfwidth); - } + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; - // SubSample V plane. - for (y = 0; y < height - 1; y += 2) { - ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth); - src_v += src_stride_v * 2; - dst_v += dst_stride_v; - } - if (height & 1) { - ScaleRowDown2(src_v, 0, dst_v, halfwidth); - } + // Resample U plane. + ScalePlane(src_u, src_stride_u, width, height, + dst_u, dst_stride_u, halfwidth, halfheight, + kFilterBilinear); + + // Resample V plane. + ScalePlane(src_v, src_stride_v, width, height, + dst_v, dst_stride_v, halfwidth, halfheight, + kFilterBilinear); return 0; } -// TODO(fbarchard): Enable bilinear when fast enough or specialized upsampler. // 411 chroma is 1/4 width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API @@ -254,15 +185,15 @@ int I411ToI420(const uint8* src_y, int src_stride_y, int halfheight = (height + 1) >> 1; int quarterwidth = (width + 3) >> 2; - // Resample U plane from 1/4 width, 1x height to 1/2 width, 1/2 height. + // Resample U plane. ScalePlane(src_u, src_stride_u, quarterwidth, height, dst_u, dst_stride_u, halfwidth, halfheight, - kFilterNone); + kFilterBilinear); // Resample V plane. ScalePlane(src_v, src_stride_v, quarterwidth, height, dst_v, dst_stride_v, halfwidth, halfheight, - kFilterNone); + kFilterBilinear); return 0; } diff --git a/source/row_posix.cc b/source/row_posix.cc index 475d2f815..5913de778 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -5389,6 +5389,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, // 1 pixel loop \n" ".p2align 2 \n" + BUNDLEALIGN "10: \n" "cvttps2dq %%xmm2,%%xmm0 \n" "packssdw %%xmm0,%%xmm0 \n" diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 0e5fab3ca..f35542531 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -106,7 +106,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ } \ } \ } \ - EXPECT_LE(max_diff, 1); \ + EXPECT_LE(max_diff, 0); \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ int abs_diff = \ @@ -119,7 +119,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ } \ } \ } \ - EXPECT_LE(max_diff, 1); \ + EXPECT_LE(max_diff, 3); \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ int abs_diff = \ @@ -132,7 +132,7 @@ TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ } \ } \ } \ - EXPECT_LE(max_diff, 1); \ + EXPECT_LE(max_diff, 3); \ free_aligned_buffer_64(dst_y_c) \ free_aligned_buffer_64(dst_u_c) \ free_aligned_buffer_64(dst_v_c) \