diff --git a/README.chromium b/README.chromium index 2db963548..0ee865232 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 435 +Version: 437 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 89e35a608..017a336cc 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -843,6 +843,14 @@ void I422ToUYVYRow_SSE2(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_frame, int width); +void I422ToYUY2Row_Any_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_frame, int width); +void I422ToUYVYRow_Any_SSE2(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_frame, int width); void I422ToYUY2Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -851,6 +859,14 @@ void I422ToUYVYRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_frame, int width); +void I422ToYUY2Row_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_frame, int width); +void I422ToUYVYRow_Any_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_frame, int width); #ifdef __cplusplus } // extern "C" diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 3a3ad4182..2307eb5ed 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 435 +#define LIBYUV_VERSION 437 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_from.cc b/source/convert_from.cc index 474fea56f..05ab3cc09 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -281,15 +281,20 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, const uint8* src_v, uint8* dst_frame, int width) = I422ToYUY2Row_C; #if defined(HAS_I422TOYUY2ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && + if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { - I422ToYUY2Row = I422ToYUY2Row_SSE2; + I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_SSE2; + } } #elif defined(HAS_I422TOYUY2ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_NEON; + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + I422ToYUY2Row = I422ToYUY2Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_NEON; + } } #endif @@ -323,15 +328,20 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, const uint8* src_v, uint8* dst_frame, int width) = I422ToYUY2Row_C; #if defined(HAS_I422TOYUY2ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && + if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { - I422ToYUY2Row = I422ToYUY2Row_SSE2; + I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_SSE2; + } } #elif defined(HAS_I422TOYUY2ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_NEON; + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + I422ToYUY2Row = I422ToYUY2Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_NEON; + } } #endif @@ -371,15 +381,20 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, const uint8* src_v, uint8* dst_frame, int width) = I422ToUYVYRow_C; #if defined(HAS_I422TOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && + if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { - I422ToUYVYRow = I422ToUYVYRow_SSE2; + I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_SSE2; + } } #elif defined(HAS_I422TOUYVYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_NEON; + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + I422ToUYVYRow = I422ToUYVYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_NEON; + } } #endif @@ -413,15 +428,20 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, const uint8* src_v, uint8* dst_frame, int width) = I422ToUYVYRow_C; #if defined(HAS_I422TOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 16) && + if (TestCpuFlag(kCpuHasSSE2) && width >= 16 && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { - I422ToUYVYRow = I422ToUYVYRow_SSE2; + I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_SSE2; + } } #elif defined(HAS_I422TOUYVYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_NEON; + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + I422ToUYVYRow = I422ToUYVYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_NEON; + } } #endif diff --git a/source/row_common.cc b/source/row_common.cc index cf826ea58..7ee305c56 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1000,22 +1000,53 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { } // YUV to RGB does multiple of 8 with SIMD and remainder with C. -#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP) \ +#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \ void NAMEANY(const uint8* y_buf, \ const uint8* u_buf, \ const uint8* v_buf, \ uint8* rgb_buf, \ int width) { \ - int n = width & ~7; \ + int n = width & ~MASK; \ I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \ I420TORGB_C(y_buf + n, \ u_buf + (n >> UV_SHIFT), \ v_buf + (n >> UV_SHIFT), \ - rgb_buf + n * BPP, width & 7); \ + rgb_buf + n * BPP, width & MASK); \ } +#ifdef HAS_I422TOARGBROW_SSSE3 +YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, + 0, 4, 7) +YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, + 1, 4, 7) +YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, + 2, 4, 7) +YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, + 1, 4, 7) +YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, + 1, 4, 7) +YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, + 1, 4, 7) +// I422ToRGB24Row_SSSE3 is unaligned. +YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7) +YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7) +YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15) +YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15) +#endif // HAS_I422TOARGBROW_SSSE3 +#ifdef HAS_I422TOARGBROW_NEON +YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7) +YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7) +YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7) +YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7) +YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7) +YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7) +YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15) +YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15) +#endif // HAS_I422TOARGBROW_NEON +#undef YANY + // Wrappers to handle odd width -#define Y2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \ +#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \ void NAMEANY(const uint8* y_buf, \ const uint8* uv_buf, \ uint8* rgb_buf, \ @@ -1028,37 +1059,16 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { } #ifdef HAS_I422TOARGBROW_SSSE3 -YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, +NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, 0, 4) -YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, - 1, 4) -YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, - 2, 4) -Y2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, +NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, 0, 4) -Y2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, - 0, 4) -YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, - 1, 4) -YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, - 1, 4) -YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, - 1, 4) -// I422ToRGB24Row_SSSE3 is unaligned. -YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3) -YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3) #endif // HAS_I422TOARGBROW_SSSE3 #ifdef HAS_I422TOARGBROW_NEON -YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4) -YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4) -YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4) -YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4) -Y2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4) -Y2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4) -YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3) -YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3) +NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4) +NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4) #endif // HAS_I422TOARGBROW_NEON -#undef YANY +#undef NVANY // RGB to RGB does multiple of 16 pixels with SIMD and remainder with C. // SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination. diff --git a/source/row_neon.cc b/source/row_neon.cc index 2a4d6b3a1..9b1933d45 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -974,7 +974,7 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { "+r"(dst_rgb565), // %1 "+r"(pix) // %2 : - : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "q8", "q9", "q10" + : "memory", "cc", "d0", "d1", "d2", "d3", "q8", "q9", "q10" ); } #endif // HAS_ARGBTORGB565ROW_NEON diff --git a/source/scale_neon.cc b/source/scale_neon.cc index a1946f051..4af3c1554 100644 --- a/source/scale_neon.cc +++ b/source/scale_neon.cc @@ -29,7 +29,7 @@ extern "C" { void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */, uint8* dst, int dst_width) { asm volatile ( - "1: \n" + "1: \n" // load even pixels into q0, odd into q1 "vld2.u8 {q0,q1}, [%0]! \n" "vst1.u8 {q0}, [%1]! \n" // store even pixels @@ -48,7 +48,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride, asm volatile ( // change the stride to row 2 pointer "add %1, %0 \n" - "1: \n" + "1: \n" "vld1.u8 {q0,q1}, [%0]! \n" // load row 1 and post inc "vld1.u8 {q2,q3}, [%1]! \n" // load row 2 and post inc "vpaddl.u8 q0, q0 \n" // row 1 add adjacent @@ -72,7 +72,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */, uint8* dst_ptr, int dst_width) { asm volatile ( - "1: \n" + "1: \n" "vld2.u8 {d0, d1}, [%0]! \n" "vtrn.u8 d1, d0 \n" "vshrn.u16 d0, q0, #8 \n" @@ -93,7 +93,7 @@ void ScaleRowDown4Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride, "add r4, %0, %3 \n" "add r5, r4, %3 \n" "add %3, r5, %3 \n" - "1: \n" + "1: \n" "vld1.u8 {q0}, [%0]! \n" // load up 16x4 "vld1.u8 {q1}, [r4]! \n" "vld1.u8 {q2}, [r5]! \n" @@ -123,7 +123,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */, uint8* dst_ptr, int dst_width) { asm volatile ( - "1: \n" + "1: \n" "vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vmov d2, d3 \n" // order d0, d1, d2 "vst3.u8 {d0, d1, d2}, [%1]! \n" @@ -143,7 +143,7 @@ void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr, asm volatile ( "vmov.u8 d24, #3 \n" "add %3, %0 \n" - "1: \n" + "1: \n" "vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vld4.u8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 @@ -199,7 +199,7 @@ void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr, asm volatile ( "vmov.u8 d24, #3 \n" "add %3, %0 \n" - "1: \n" + "1: \n" "vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vld4.u8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 @@ -251,7 +251,7 @@ void ScaleRowDown38_NEON(const uint8* src_ptr, uint8* dst_ptr, int dst_width) { asm volatile ( "vld1.u8 {q3}, [%3] \n" - "1: \n" + "1: \n" "vld1.u8 {d0, d1, d2, d3}, [%0]! \n" "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" @@ -277,7 +277,7 @@ void OMITFP ScaleRowDown38_3_Int_NEON(const uint8* src_ptr, "vld1.u8 {q15}, [%6] \n" "add r4, %0, %3, lsl #1 \n" "add %3, %0 \n" - "1: \n" + "1: \n" // d0 = 00 40 01 41 02 42 03 43 // d1 = 10 50 11 51 12 52 13 53 @@ -385,7 +385,7 @@ void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr, "vld1.u16 {q13}, [%4] \n" "vld1.u8 {q14}, [%5] \n" "add %3, %0 \n" - "1: \n" + "1: \n" // d0 = 00 40 01 41 02 42 03 43 // d1 = 10 50 11 51 12 52 13 53 @@ -485,7 +485,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr, "vdup.8 d5, %4 \n" "rsb %4, #256 \n" "vdup.8 d4, %4 \n" - "1: \n" + "1: \n" "vld1.u8 {q0}, [%1]! \n" "vld1.u8 {q1}, [%2]! \n" "subs %3, #16 \n" @@ -499,21 +499,21 @@ void ScaleFilterRows_NEON(uint8* dst_ptr, "bgt 1b \n" "b 4f \n" - "2: \n" + "2: \n" "vld1.u8 {q0}, [%1]! \n" "subs %3, #16 \n" "vst1.u8 {q0}, [%0]! \n" "bgt 2b \n" "b 4f \n" - "3: \n" + "3: \n" "vld1.u8 {q0}, [%1]! \n" "vld1.u8 {q1}, [%2]! \n" "subs %3, #16 \n" "vrhadd.u8 q0, q1 \n" "vst1.u8 {q0}, [%0]! \n" "bgt 3b \n" - "4: \n" + "4: \n" "vst1.u8 {d1[7]}, [%0] \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index c81a7ab3b..1517be821 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -238,15 +238,15 @@ TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2) TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - W1280, DIFF, N, NEG) \ + W1280, DIFF, N, NEG, OFF) \ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = 720; \ const int kStrideB = ((kWidth * 8 * BPP_B + 7) / 8 + ALIGN - 1) / \ ALIGN * ALIGN; \ - align_buffer_16(src_y, kWidth * kHeight); \ - align_buffer_16(src_u, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ - align_buffer_16(src_v, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ + align_buffer_16(src_y, kWidth * kHeight + OFF); \ + align_buffer_16(src_u, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y + OFF); \ + align_buffer_16(src_v, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y + OFF); \ align_buffer_16(dst_argb_c, kStrideB * kHeight); \ align_buffer_16(dst_argb_opt, kStrideB * kHeight); \ memset(dst_argb_c, 0, kStrideB * kHeight); \ @@ -254,26 +254,26 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ srandom(time(NULL)); \ for (int i = 0; i < kHeight; ++i) { \ for (int j = 0; j < kWidth; ++j) { \ - src_y[(i * kWidth) + j] = (random() & 0xff); \ + src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ } \ } \ for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \ for (int j = 0; j < kWidth / SUBSAMP_X; ++j) { \ - src_u[(i * kWidth / SUBSAMP_X) + j] = (random() & 0xff); \ - src_v[(i * kWidth / SUBSAMP_X) + j] = (random() & 0xff); \ + src_u[(i * kWidth / SUBSAMP_X) + j + OFF] = (random() & 0xff); \ + src_v[(i * kWidth / SUBSAMP_X) + j + OFF] = (random() & 0xff); \ } \ } \ MaskCpuFlags(0); \ - FMT_PLANAR##To##FMT_B(src_y, kWidth, \ - src_u, kWidth / SUBSAMP_X, \ - src_v, kWidth / SUBSAMP_X, \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, kWidth / SUBSAMP_X, \ + src_v + OFF, kWidth / SUBSAMP_X, \ dst_argb_c, kStrideB, \ kWidth, NEG kHeight); \ MaskCpuFlags(-1); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y, kWidth, \ - src_u, kWidth / SUBSAMP_X, \ - src_v, kWidth / SUBSAMP_X, \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, kWidth / SUBSAMP_X, \ + src_v + OFF, kWidth / SUBSAMP_X, \ dst_argb_opt, kStrideB, \ kWidth, NEG kHeight); \ } \ @@ -313,11 +313,13 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ #define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ DIFF) \ TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - 1280, DIFF, _Opt, +) \ + 1280, DIFF, _Opt, +, 0) \ TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - 1280, DIFF, _Invert, -) \ + 1280, DIFF, _Unaligned, +, 1) \ TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - 1276, DIFF, _Any, +) + 1280, DIFF, _Invert, -, 0) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + 1276, DIFF, _Any, +, 0) TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 2) TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 2) @@ -346,32 +348,32 @@ TESTPLANARTOB(I420, 2, 2, BayerGBRG, 1, 1, 2) TESTPLANARTOB(I420, 2, 2, BayerGRBG, 1, 1, 2) #define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - W1280, DIFF, N, NEG) \ + W1280, DIFF, N, NEG, OFF) \ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = 720; \ const int kStrideB = kWidth * BPP_B; \ - align_buffer_16(src_y, kWidth * kHeight); \ - align_buffer_16(src_uv, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y * 2); \ + align_buffer_16(src_y, kWidth * kHeight + OFF); \ + align_buffer_16(src_uv, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y * 2 + OFF); \ align_buffer_16(dst_argb_c, kStrideB * kHeight); \ align_buffer_16(dst_argb_opt, kStrideB * kHeight); \ srandom(time(NULL)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kWidth; ++j) \ - src_y[(i * kWidth) + j] = (random() & 0xff); \ + src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) \ for (int j = 0; j < kWidth / SUBSAMP_X * 2; ++j) { \ - src_uv[(i * kWidth / SUBSAMP_X) * 2 + j] = (random() & 0xff); \ + src_uv[(i * kWidth / SUBSAMP_X) * 2 + j + OFF] = (random() & 0xff); \ } \ MaskCpuFlags(0); \ - FMT_PLANAR##To##FMT_B(src_y, kWidth, \ - src_uv, kWidth / SUBSAMP_X * 2, \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_uv + OFF, kWidth / SUBSAMP_X * 2, \ dst_argb_c, kWidth * BPP_B, \ kWidth, NEG kHeight); \ MaskCpuFlags(-1); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y, kWidth, \ - src_uv, kWidth / SUBSAMP_X * 2, \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_uv + OFF, kWidth / SUBSAMP_X * 2, \ dst_argb_opt, kWidth * BPP_B, \ kWidth, NEG kHeight); \ } \ @@ -408,11 +410,13 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ #define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \ TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF, \ - 1280, _Opt, +) \ + 1280, _Opt, +, 0) \ TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF, \ - 1280, _Invert, -) \ + 1280, _Unaligned, +, 1) \ TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF, \ - 1276, _Any, +) + 1280, _Invert, -, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF, \ + 1276, _Any, +, 0) TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) @@ -421,12 +425,12 @@ TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) // TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9) #define TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - W1280, N, NEG) \ + W1280, N, NEG, OFF) \ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \ const int kWidth = W1280; \ const int kHeight = 720; \ const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \ - align_buffer_16(src_argb, kStride * kHeight); \ + align_buffer_16(src_argb, kStride * kHeight + OFF); \ align_buffer_16(dst_y_c, kWidth * kHeight); \ align_buffer_16(dst_u_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ align_buffer_16(dst_v_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \ @@ -436,16 +440,16 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \ srandom(time(NULL)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j] = (random() & 0xff); \ + src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \ MaskCpuFlags(0); \ - FMT_A##To##FMT_PLANAR(src_argb, kStride, \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ dst_y_c, kWidth, \ dst_u_c, kWidth / SUBSAMP_X, \ dst_v_c, kWidth / SUBSAMP_X, \ kWidth, NEG kHeight); \ MaskCpuFlags(-1); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb, kStride, \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ dst_y_opt, kWidth, \ dst_u_opt, kWidth / SUBSAMP_X, \ dst_v_opt, kWidth / SUBSAMP_X, \ @@ -496,11 +500,13 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \ #define TESTATOPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - 1280, _Opt, +) \ + 1280, _Opt, +, 0) \ TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - 1280, _Invert, -) \ + 1280, _Unaligned, +, 1) \ TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - 1276, _Any, +) + 1280, _Invert, -, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + 1276, _Any, +, 0) TESTATOPLANAR(ARGB, 4, I420, 2, 2) TESTATOPLANAR(BGRA, 4, I420, 2, 2) @@ -526,24 +532,25 @@ TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2) TESTATOPLANAR(BayerGBRG, 1, I420, 2, 2) TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2) -#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, W1280, DIFF, N, NEG) \ +#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, W1280, DIFF, \ + N, NEG, OFF) \ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \ const int kWidth = W1280; \ const int kHeight = 720; \ - align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight); \ + align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight + OFF); \ align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \ align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \ srandom(time(NULL)); \ for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) { \ - src_argb[i] = (random() & 0xff); \ + src_argb[i + OFF] = (random() & 0xff); \ } \ MaskCpuFlags(0); \ - FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \ + FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A, \ dst_argb_c, kWidth * BPP_B, \ kWidth, NEG kHeight); \ MaskCpuFlags(-1); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \ + FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A, \ dst_argb_opt, kWidth * BPP_B, \ kWidth, NEG kHeight); \ } \ @@ -562,9 +569,11 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \ free_aligned_buffer_16(dst_argb_opt) \ } #define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, DIFF) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Opt, +) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Invert, -) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Any, +) + TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Opt, +, 0) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, \ + _Unaligned, +, 1) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Invert, -, 0) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Any, +, 0) TESTATOB(ARGB, 4, 4, ARGB, 4, 0) TESTATOB(ARGB, 4, 4, BGRA, 4, 0)