diff --git a/README.chromium b/README.chromium index 41b410ae2..c3d58b987 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 884 +Version: 885 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index be3bba444..f0343a77d 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -1,168 +1,168 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT -#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Copy ARGB to ARGB. -#define ARGBToARGB ARGBCopy -LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To BGRA. (alias) -#define ARGBToBGRA BGRAToARGB -LIBYUV_API -int BGRAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To ABGR. (alias) -#define ARGBToABGR ABGRToARGB -LIBYUV_API -int ABGRToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To RGBA. -LIBYUV_API -int ARGBToRGBA(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To RGB24. -LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height); - -// Convert ARGB To RAW. -LIBYUV_API -int ARGBToRAW(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb, int dst_stride_rgb, - int width, int height); - -// Convert ARGB To RGB565. -LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); - -// Convert ARGB To ARGB1555. -LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height); - -// Convert ARGB To ARGB4444. -LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height); - -// Convert ARGB To I444. -LIBYUV_API -int ARGBToI444(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I422. -LIBYUV_API -int ARGBToI422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I420. (also in convert.h) -LIBYUV_API -int ARGBToI420(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB to J420. (JPeg full range I420). -LIBYUV_API -int ARGBToJ420(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I411. -LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB to J400. (JPeg full range). -LIBYUV_API -int ARGBToJ400(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - int width, int height); - -// Convert ARGB to I400. -LIBYUV_API -int ARGBToI400(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Convert ARGB To NV12. -LIBYUV_API -int ARGBToNV12(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -// Convert ARGB To NV21. -LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -// Convert ARGB To NV21. -LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -// Convert ARGB To YUY2. -LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height); - -// Convert ARGB To UYVY. -LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT +#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ + +#include "libyuv/basic_types.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +// Copy ARGB to ARGB. +#define ARGBToARGB ARGBCopy +LIBYUV_API +int ARGBCopy(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert ARGB To BGRA. (alias) +#define ARGBToBGRA BGRAToARGB +LIBYUV_API +int BGRAToARGB(const uint8* src_frame, int src_stride_frame, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert ARGB To ABGR. (alias) +#define ARGBToABGR ABGRToARGB +LIBYUV_API +int ABGRToARGB(const uint8* src_frame, int src_stride_frame, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert ARGB To RGBA. +LIBYUV_API +int ARGBToRGBA(const uint8* src_frame, int src_stride_frame, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert ARGB To RGB24. +LIBYUV_API +int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, + uint8* dst_rgb24, int dst_stride_rgb24, + int width, int height); + +// Convert ARGB To RAW. +LIBYUV_API +int ARGBToRAW(const uint8* src_argb, int src_stride_argb, + uint8* dst_rgb, int dst_stride_rgb, + int width, int height); + +// Convert ARGB To RGB565. +LIBYUV_API +int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, + uint8* dst_rgb565, int dst_stride_rgb565, + int width, int height); + +// Convert ARGB To ARGB1555. +LIBYUV_API +int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb1555, int dst_stride_argb1555, + int width, int height); + +// Convert ARGB To ARGB4444. +LIBYUV_API +int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb4444, int dst_stride_argb4444, + int width, int height); + +// Convert ARGB To I444. +LIBYUV_API +int ARGBToI444(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert ARGB To I422. +LIBYUV_API +int ARGBToI422(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert ARGB To I420. (also in convert.h) +LIBYUV_API +int ARGBToI420(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert ARGB to J420. (JPeg full range I420). +LIBYUV_API +int ARGBToJ420(const uint8* src_argb, int src_stride_argb, + uint8* dst_yj, int dst_stride_yj, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert ARGB To I411. +LIBYUV_API +int ARGBToI411(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert ARGB to J400. (JPeg full range). +LIBYUV_API +int ARGBToJ400(const uint8* src_argb, int src_stride_argb, + uint8* dst_yj, int dst_stride_yj, + int width, int height); + +// Convert ARGB to I400. +LIBYUV_API +int ARGBToI400(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + int width, int height); + +// Convert ARGB To NV12. +LIBYUV_API +int ARGBToNV12(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_uv, int dst_stride_uv, + int width, int height); + +// Convert ARGB To NV21. +LIBYUV_API +int ARGBToNV21(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_vu, int dst_stride_vu, + int width, int height); + +// Convert ARGB To NV21. +LIBYUV_API +int ARGBToNV21(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + uint8* dst_vu, int dst_stride_vu, + int width, int height); + +// Convert ARGB To YUY2. +LIBYUV_API +int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, + uint8* dst_yuy2, int dst_stride_yuy2, + int width, int height); + +// Convert ARGB To UYVY. +LIBYUV_API +int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, + uint8* dst_uyvy, int dst_stride_uyvy, + int width, int height); + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT diff --git a/include/libyuv/version.h b/include/libyuv/version.h index ce3a723e2..f14bdbf6a 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 884 +#define LIBYUV_VERSION 885 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/compare_common.cc b/source/compare_common.cc index ab587d081..3e4c77a67 100644 --- a/source/compare_common.cc +++ b/source/compare_common.cc @@ -1,40 +1,40 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse = 0u; - for (int i = 0; i < count; ++i) { - int diff = src_a[i] - src_b[i]; - sse += static_cast(diff * diff); - } - return sse; -} - -// hash seed of 5381 recommended. -// Internal C version of HashDjb2 with int sized count for efficiency. -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) { - uint32 hash = seed; - for (int i = 0; i < count; ++i) { - hash += (hash << 5) + src[i]; - } - return hash; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/basic_types.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) { + uint32 sse = 0u; + for (int i = 0; i < count; ++i) { + int diff = src_a[i] - src_b[i]; + sse += static_cast(diff * diff); + } + return sse; +} + +// hash seed of 5381 recommended. +// Internal C version of HashDjb2 with int sized count for efficiency. +uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) { + uint32 hash = seed; + for (int i = 0; i < count; ++i) { + hash += (hash << 5) + src[i]; + } + return hash; +} + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif diff --git a/source/compare_neon.cc b/source/compare_neon.cc index a4e777506..c377c1634 100644 --- a/source/compare_neon.cc +++ b/source/compare_neon.cc @@ -1,61 +1,61 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) - -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { - volatile uint32 sse; - asm volatile ( - "vmov.u8 q8, #0 \n" - "vmov.u8 q10, #0 \n" - "vmov.u8 q9, #0 \n" - "vmov.u8 q11, #0 \n" - - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" - "vld1.8 {q1}, [%1]! \n" - "subs %2, %2, #16 \n" - "vsubl.u8 q2, d0, d2 \n" - "vsubl.u8 q3, d1, d3 \n" - "vmlal.s16 q8, d4, d4 \n" - "vmlal.s16 q9, d6, d6 \n" - "vmlal.s16 q10, d5, d5 \n" - "vmlal.s16 q11, d7, d7 \n" - "bgt 1b \n" - - "vadd.u32 q8, q8, q9 \n" - "vadd.u32 q10, q10, q11 \n" - "vadd.u32 q11, q8, q10 \n" - "vpaddl.u32 q1, q11 \n" - "vadd.u64 d0, d2, d3 \n" - "vmov.32 %3, d0[0] \n" - : "+r"(src_a), - "+r"(src_b), - "+r"(count), - "=r"(sse) - : - : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); - return sse; -} - -#endif // __ARM_NEON__ - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/basic_types.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) + +uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { + volatile uint32 sse; + asm volatile ( + "vmov.u8 q8, #0 \n" + "vmov.u8 q10, #0 \n" + "vmov.u8 q9, #0 \n" + "vmov.u8 q11, #0 \n" + + ".p2align 2 \n" + "1: \n" + "vld1.8 {q0}, [%0]! \n" + "vld1.8 {q1}, [%1]! \n" + "subs %2, %2, #16 \n" + "vsubl.u8 q2, d0, d2 \n" + "vsubl.u8 q3, d1, d3 \n" + "vmlal.s16 q8, d4, d4 \n" + "vmlal.s16 q9, d6, d6 \n" + "vmlal.s16 q10, d5, d5 \n" + "vmlal.s16 q11, d7, d7 \n" + "bgt 1b \n" + + "vadd.u32 q8, q8, q9 \n" + "vadd.u32 q10, q10, q11 \n" + "vadd.u32 q11, q8, q10 \n" + "vpaddl.u32 q1, q11 \n" + "vadd.u64 d0, d2, d3 \n" + "vmov.32 %3, d0[0] \n" + : "+r"(src_a), + "+r"(src_b), + "+r"(count), + "=r"(sse) + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); + return sse; +} + +#endif // __ARM_NEON__ + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif diff --git a/source/compare_posix.cc b/source/compare_posix.cc index bb7902dbf..1e0ba8fe1 100644 --- a/source/compare_posix.cc +++ b/source/compare_posix.cc @@ -1,166 +1,166 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) - -#if defined(__native_client__) && defined(__x86_64__) -#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" -#define MEMLEA(offset, base) #offset "(%q" #base ")" -#else -#define MEMACCESS(base) "(%" #base ")" -#define MEMLEA(offset, base) #offset "(%" #base ")" -#endif - -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse; - asm volatile ( // NOLINT - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm5,%%xmm5 \n" - ".p2align 2 \n" - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "movdqa " MEMACCESS(1) ",%%xmm2 \n" - "lea " MEMLEA(0x10, 1) ",%1 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psubusb %%xmm2,%%xmm1 \n" - "psubusb %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm2 \n" - "pmaddwd %%xmm1,%%xmm1 \n" - "pmaddwd %%xmm2,%%xmm2 \n" - "paddd %%xmm1,%%xmm0 \n" - "paddd %%xmm2,%%xmm0 \n" - "jg 1b \n" - - "pshufd $0xee,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "pshufd $0x1,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "movd %%xmm0,%3 \n" - - : "+r"(src_a), // %0 - "+r"(src_b), // %1 - "+r"(count), // %2 - "=g"(sse) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); // NOLINT - return sse; -} - -#endif // defined(__x86_64__) || defined(__i386__) - -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) -#define HAS_HASHDJB2_SSE41 -static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 -static uvec32 kHashMul0 = { - 0x0c3525e1, // 33 ^ 15 - 0xa3476dc1, // 33 ^ 14 - 0x3b4039a1, // 33 ^ 13 - 0x4f5f0981, // 33 ^ 12 -}; -static uvec32 kHashMul1 = { - 0x30f35d61, // 33 ^ 11 - 0x855cb541, // 33 ^ 10 - 0x040a9121, // 33 ^ 9 - 0x747c7101, // 33 ^ 8 -}; -static uvec32 kHashMul2 = { - 0xec41d4e1, // 33 ^ 7 - 0x4cfa3cc1, // 33 ^ 6 - 0x025528a1, // 33 ^ 5 - 0x00121881, // 33 ^ 4 -}; -static uvec32 kHashMul3 = { - 0x00008c61, // 33 ^ 3 - 0x00000441, // 33 ^ 2 - 0x00000021, // 33 ^ 1 - 0x00000001, // 33 ^ 0 -}; - -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { - uint32 hash; - asm volatile ( // NOLINT - "movd %2,%%xmm0 \n" - "pxor %%xmm7,%%xmm7 \n" - "movdqa %4,%%xmm6 \n" - ".p2align 2 \n" - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "pmulld %%xmm6,%%xmm0 \n" - "movdqa %5,%%xmm5 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm7,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm7,%%xmm3 \n" - "pmulld %%xmm5,%%xmm3 \n" - "movdqa %6,%%xmm5 \n" - "movdqa %%xmm2,%%xmm4 \n" - "punpckhwd %%xmm7,%%xmm4 \n" - "pmulld %%xmm5,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "punpckhbw %%xmm7,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklwd %%xmm7,%%xmm2 \n" - "pmulld %%xmm5,%%xmm2 \n" - "movdqa %8,%%xmm5 \n" - "punpckhwd %%xmm7,%%xmm1 \n" - "pmulld %%xmm5,%%xmm1 \n" - "paddd %%xmm4,%%xmm3 \n" - "paddd %%xmm2,%%xmm1 \n" - "sub $0x10,%1 \n" - "paddd %%xmm3,%%xmm1 \n" - "pshufd $0xe,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "pshufd $0x1,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "jg 1b \n" - "movd %%xmm0,%3 \n" - : "+r"(src), // %0 - "+r"(count), // %1 - "+rm"(seed), // %2 - "=g"(hash) // %3 - : "m"(kHash16x33), // %4 - "m"(kHashMul0), // %5 - "m"(kHashMul1), // %6 - "m"(kHashMul2), // %7 - "m"(kHashMul3) // %8 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); // NOLINT - return hash; -} -#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/basic_types.h" +#include "libyuv/row.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) + +#if defined(__native_client__) && defined(__x86_64__) +#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" +#define MEMLEA(offset, base) #offset "(%q" #base ")" +#else +#define MEMACCESS(base) "(%" #base ")" +#define MEMLEA(offset, base) #offset "(%" #base ")" +#endif + +uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { + uint32 sse; + asm volatile ( // NOLINT + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm5 \n" + ".p2align 2 \n" + "1: \n" + "movdqa " MEMACCESS(0) ",%%xmm1 \n" + "lea " MEMLEA(0x10, 0) ",%0 \n" + "movdqa " MEMACCESS(1) ",%%xmm2 \n" + "lea " MEMLEA(0x10, 1) ",%1 \n" + "sub $0x10,%2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "psubusb %%xmm2,%%xmm1 \n" + "psubusb %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm2 \n" + "pmaddwd %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm2,%%xmm2 \n" + "paddd %%xmm1,%%xmm0 \n" + "paddd %%xmm2,%%xmm0 \n" + "jg 1b \n" + + "pshufd $0xee,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "pshufd $0x1,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "movd %%xmm0,%3 \n" + + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=g"(sse) // %3 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" +#endif + ); // NOLINT + return sse; +} + +#endif // defined(__x86_64__) || defined(__i386__) + +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) +#define HAS_HASHDJB2_SSE41 +static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 +static uvec32 kHashMul0 = { + 0x0c3525e1, // 33 ^ 15 + 0xa3476dc1, // 33 ^ 14 + 0x3b4039a1, // 33 ^ 13 + 0x4f5f0981, // 33 ^ 12 +}; +static uvec32 kHashMul1 = { + 0x30f35d61, // 33 ^ 11 + 0x855cb541, // 33 ^ 10 + 0x040a9121, // 33 ^ 9 + 0x747c7101, // 33 ^ 8 +}; +static uvec32 kHashMul2 = { + 0xec41d4e1, // 33 ^ 7 + 0x4cfa3cc1, // 33 ^ 6 + 0x025528a1, // 33 ^ 5 + 0x00121881, // 33 ^ 4 +}; +static uvec32 kHashMul3 = { + 0x00008c61, // 33 ^ 3 + 0x00000441, // 33 ^ 2 + 0x00000021, // 33 ^ 1 + 0x00000001, // 33 ^ 0 +}; + +uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { + uint32 hash; + asm volatile ( // NOLINT + "movd %2,%%xmm0 \n" + "pxor %%xmm7,%%xmm7 \n" + "movdqa %4,%%xmm6 \n" + ".p2align 2 \n" + "1: \n" + "movdqu " MEMACCESS(0) ",%%xmm1 \n" + "lea " MEMLEA(0x10, 0) ",%0 \n" + "pmulld %%xmm6,%%xmm0 \n" + "movdqa %5,%%xmm5 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm7,%%xmm2 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm7,%%xmm3 \n" + "pmulld %%xmm5,%%xmm3 \n" + "movdqa %6,%%xmm5 \n" + "movdqa %%xmm2,%%xmm4 \n" + "punpckhwd %%xmm7,%%xmm4 \n" + "pmulld %%xmm5,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "punpckhbw %%xmm7,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklwd %%xmm7,%%xmm2 \n" + "pmulld %%xmm5,%%xmm2 \n" + "movdqa %8,%%xmm5 \n" + "punpckhwd %%xmm7,%%xmm1 \n" + "pmulld %%xmm5,%%xmm1 \n" + "paddd %%xmm4,%%xmm3 \n" + "paddd %%xmm2,%%xmm1 \n" + "sub $0x10,%1 \n" + "paddd %%xmm3,%%xmm1 \n" + "pshufd $0xe,%%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm1 \n" + "pshufd $0x1,%%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "jg 1b \n" + "movd %%xmm0,%3 \n" + : "+r"(src), // %0 + "+r"(count), // %1 + "+rm"(seed), // %2 + "=g"(hash) // %3 + : "m"(kHash16x33), // %4 + "m"(kHashMul0), // %5 + "m"(kHashMul1), // %6 + "m"(kHashMul2), // %7 + "m"(kHashMul3) // %8 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" +#endif + ); // NOLINT + return hash; +} +#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + diff --git a/source/compare_win.cc b/source/compare_win.cc index ab279777e..99831651f 100644 --- a/source/compare_win.cc +++ b/source/compare_win.cc @@ -1,232 +1,232 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -__declspec(naked) __declspec(align(16)) -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { - __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count - pxor xmm0, xmm0 - pxor xmm5, xmm5 - - align 4 - wloop: - movdqa xmm1, [eax] - lea eax, [eax + 16] - movdqa xmm2, [edx] - lea edx, [edx + 16] - sub ecx, 16 - movdqa xmm3, xmm1 // abs trick - psubusb xmm1, xmm2 - psubusb xmm2, xmm3 - por xmm1, xmm2 - movdqa xmm2, xmm1 - punpcklbw xmm1, xmm5 - punpckhbw xmm2, xmm5 - pmaddwd xmm1, xmm1 - pmaddwd xmm2, xmm2 - paddd xmm0, xmm1 - paddd xmm0, xmm2 - jg wloop - - pshufd xmm1, xmm0, 0xee - paddd xmm0, xmm1 - pshufd xmm1, xmm0, 0x01 - paddd xmm0, xmm1 - movd eax, xmm0 - ret - } -} - -// Visual C 2012 required for AVX2. -#if _MSC_VER >= 1700 -// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. -#pragma warning(disable: 4752) -__declspec(naked) __declspec(align(16)) -uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { - __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count - vpxor ymm0, ymm0, ymm0 // sum - vpxor ymm5, ymm5, ymm5 // constant 0 for unpck - sub edx, eax - - align 4 - wloop: - vmovdqu ymm1, [eax] - vmovdqu ymm2, [eax + edx] - lea eax, [eax + 32] - sub ecx, 32 - vpsubusb ymm3, ymm1, ymm2 // abs difference trick - vpsubusb ymm2, ymm2, ymm1 - vpor ymm1, ymm2, ymm3 - vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order. - vpunpckhbw ymm1, ymm1, ymm5 - vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32. - vpmaddwd ymm1, ymm1, ymm1 - vpaddd ymm0, ymm0, ymm1 - vpaddd ymm0, ymm0, ymm2 - jg wloop - - vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes. - vpaddd ymm0, ymm0, ymm1 - vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes. - vpaddd ymm0, ymm0, ymm1 - vpermq ymm1, ymm0, 0x02 // high + low lane. - vpaddd ymm0, ymm0, ymm1 - vmovd eax, xmm0 - vzeroupper - ret - } -} -#endif // _MSC_VER >= 1700 - -#define HAS_HASHDJB2_SSE41 -static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 -static uvec32 kHashMul0 = { - 0x0c3525e1, // 33 ^ 15 - 0xa3476dc1, // 33 ^ 14 - 0x3b4039a1, // 33 ^ 13 - 0x4f5f0981, // 33 ^ 12 -}; -static uvec32 kHashMul1 = { - 0x30f35d61, // 33 ^ 11 - 0x855cb541, // 33 ^ 10 - 0x040a9121, // 33 ^ 9 - 0x747c7101, // 33 ^ 8 -}; -static uvec32 kHashMul2 = { - 0xec41d4e1, // 33 ^ 7 - 0x4cfa3cc1, // 33 ^ 6 - 0x025528a1, // 33 ^ 5 - 0x00121881, // 33 ^ 4 -}; -static uvec32 kHashMul3 = { - 0x00008c61, // 33 ^ 3 - 0x00000441, // 33 ^ 2 - 0x00000021, // 33 ^ 1 - 0x00000001, // 33 ^ 0 -}; - -// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6 -// 44: 66 0F 38 40 DD pmulld xmm3,xmm5 -// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5 -// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5 -// 83: 66 0F 38 40 CD pmulld xmm1,xmm5 -#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ - _asm _emit 0x40 _asm _emit reg - -__declspec(naked) __declspec(align(16)) -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { - __asm { - mov eax, [esp + 4] // src - mov ecx, [esp + 8] // count - movd xmm0, [esp + 12] // seed - - pxor xmm7, xmm7 // constant 0 for unpck - movdqa xmm6, kHash16x33 - - align 4 - wloop: - movdqu xmm1, [eax] // src[0-15] - lea eax, [eax + 16] - pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16 - movdqa xmm5, kHashMul0 - movdqa xmm2, xmm1 - punpcklbw xmm2, xmm7 // src[0-7] - movdqa xmm3, xmm2 - punpcklwd xmm3, xmm7 // src[0-3] - pmulld(0xdd) // pmulld xmm3, xmm5 - movdqa xmm5, kHashMul1 - movdqa xmm4, xmm2 - punpckhwd xmm4, xmm7 // src[4-7] - pmulld(0xe5) // pmulld xmm4, xmm5 - movdqa xmm5, kHashMul2 - punpckhbw xmm1, xmm7 // src[8-15] - movdqa xmm2, xmm1 - punpcklwd xmm2, xmm7 // src[8-11] - pmulld(0xd5) // pmulld xmm2, xmm5 - movdqa xmm5, kHashMul3 - punpckhwd xmm1, xmm7 // src[12-15] - pmulld(0xcd) // pmulld xmm1, xmm5 - paddd xmm3, xmm4 // add 16 results - paddd xmm1, xmm2 - sub ecx, 16 - paddd xmm1, xmm3 - - pshufd xmm2, xmm1, 0x0e // upper 2 dwords - paddd xmm1, xmm2 - pshufd xmm2, xmm1, 0x01 - paddd xmm1, xmm2 - paddd xmm0, xmm1 - jg wloop - - movd eax, xmm0 // return hash - ret - } -} - -// Visual C 2012 required for AVX2. -#if _MSC_VER >= 1700 -__declspec(naked) __declspec(align(16)) -uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { - __asm { - mov eax, [esp + 4] // src - mov ecx, [esp + 8] // count - movd xmm0, [esp + 12] // seed - movdqa xmm6, kHash16x33 - - align 4 - wloop: - vpmovzxbd xmm3, dword ptr [eax] // src[0-3] - pmulld xmm0, xmm6 // hash *= 33 ^ 16 - vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7] - pmulld xmm3, kHashMul0 - vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11] - pmulld xmm4, kHashMul1 - vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15] - pmulld xmm2, kHashMul2 - lea eax, [eax + 16] - pmulld xmm1, kHashMul3 - paddd xmm3, xmm4 // add 16 results - paddd xmm1, xmm2 - sub ecx, 16 - paddd xmm1, xmm3 - pshufd xmm2, xmm1, 0x0e // upper 2 dwords - paddd xmm1, xmm2 - pshufd xmm2, xmm1, 0x01 - paddd xmm1, xmm2 - paddd xmm0, xmm1 - jg wloop - - movd eax, xmm0 // return hash - ret - } -} -#endif // _MSC_VER >= 1700 - -#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif +/* + * Copyright 2012 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "libyuv/basic_types.h" +#include "libyuv/row.h" + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) + +__declspec(naked) __declspec(align(16)) +uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { + __asm { + mov eax, [esp + 4] // src_a + mov edx, [esp + 8] // src_b + mov ecx, [esp + 12] // count + pxor xmm0, xmm0 + pxor xmm5, xmm5 + + align 4 + wloop: + movdqa xmm1, [eax] + lea eax, [eax + 16] + movdqa xmm2, [edx] + lea edx, [edx + 16] + sub ecx, 16 + movdqa xmm3, xmm1 // abs trick + psubusb xmm1, xmm2 + psubusb xmm2, xmm3 + por xmm1, xmm2 + movdqa xmm2, xmm1 + punpcklbw xmm1, xmm5 + punpckhbw xmm2, xmm5 + pmaddwd xmm1, xmm1 + pmaddwd xmm2, xmm2 + paddd xmm0, xmm1 + paddd xmm0, xmm2 + jg wloop + + pshufd xmm1, xmm0, 0xee + paddd xmm0, xmm1 + pshufd xmm1, xmm0, 0x01 + paddd xmm0, xmm1 + movd eax, xmm0 + ret + } +} + +// Visual C 2012 required for AVX2. +#if _MSC_VER >= 1700 +// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. +#pragma warning(disable: 4752) +__declspec(naked) __declspec(align(16)) +uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { + __asm { + mov eax, [esp + 4] // src_a + mov edx, [esp + 8] // src_b + mov ecx, [esp + 12] // count + vpxor ymm0, ymm0, ymm0 // sum + vpxor ymm5, ymm5, ymm5 // constant 0 for unpck + sub edx, eax + + align 4 + wloop: + vmovdqu ymm1, [eax] + vmovdqu ymm2, [eax + edx] + lea eax, [eax + 32] + sub ecx, 32 + vpsubusb ymm3, ymm1, ymm2 // abs difference trick + vpsubusb ymm2, ymm2, ymm1 + vpor ymm1, ymm2, ymm3 + vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order. + vpunpckhbw ymm1, ymm1, ymm5 + vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32. + vpmaddwd ymm1, ymm1, ymm1 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm0, ymm0, ymm2 + jg wloop + + vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes. + vpaddd ymm0, ymm0, ymm1 + vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes. + vpaddd ymm0, ymm0, ymm1 + vpermq ymm1, ymm0, 0x02 // high + low lane. + vpaddd ymm0, ymm0, ymm1 + vmovd eax, xmm0 + vzeroupper + ret + } +} +#endif // _MSC_VER >= 1700 + +#define HAS_HASHDJB2_SSE41 +static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 +static uvec32 kHashMul0 = { + 0x0c3525e1, // 33 ^ 15 + 0xa3476dc1, // 33 ^ 14 + 0x3b4039a1, // 33 ^ 13 + 0x4f5f0981, // 33 ^ 12 +}; +static uvec32 kHashMul1 = { + 0x30f35d61, // 33 ^ 11 + 0x855cb541, // 33 ^ 10 + 0x040a9121, // 33 ^ 9 + 0x747c7101, // 33 ^ 8 +}; +static uvec32 kHashMul2 = { + 0xec41d4e1, // 33 ^ 7 + 0x4cfa3cc1, // 33 ^ 6 + 0x025528a1, // 33 ^ 5 + 0x00121881, // 33 ^ 4 +}; +static uvec32 kHashMul3 = { + 0x00008c61, // 33 ^ 3 + 0x00000441, // 33 ^ 2 + 0x00000021, // 33 ^ 1 + 0x00000001, // 33 ^ 0 +}; + +// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6 +// 44: 66 0F 38 40 DD pmulld xmm3,xmm5 +// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5 +// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5 +// 83: 66 0F 38 40 CD pmulld xmm1,xmm5 +#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ + _asm _emit 0x40 _asm _emit reg + +__declspec(naked) __declspec(align(16)) +uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { + __asm { + mov eax, [esp + 4] // src + mov ecx, [esp + 8] // count + movd xmm0, [esp + 12] // seed + + pxor xmm7, xmm7 // constant 0 for unpck + movdqa xmm6, kHash16x33 + + align 4 + wloop: + movdqu xmm1, [eax] // src[0-15] + lea eax, [eax + 16] + pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16 + movdqa xmm5, kHashMul0 + movdqa xmm2, xmm1 + punpcklbw xmm2, xmm7 // src[0-7] + movdqa xmm3, xmm2 + punpcklwd xmm3, xmm7 // src[0-3] + pmulld(0xdd) // pmulld xmm3, xmm5 + movdqa xmm5, kHashMul1 + movdqa xmm4, xmm2 + punpckhwd xmm4, xmm7 // src[4-7] + pmulld(0xe5) // pmulld xmm4, xmm5 + movdqa xmm5, kHashMul2 + punpckhbw xmm1, xmm7 // src[8-15] + movdqa xmm2, xmm1 + punpcklwd xmm2, xmm7 // src[8-11] + pmulld(0xd5) // pmulld xmm2, xmm5 + movdqa xmm5, kHashMul3 + punpckhwd xmm1, xmm7 // src[12-15] + pmulld(0xcd) // pmulld xmm1, xmm5 + paddd xmm3, xmm4 // add 16 results + paddd xmm1, xmm2 + sub ecx, 16 + paddd xmm1, xmm3 + + pshufd xmm2, xmm1, 0x0e // upper 2 dwords + paddd xmm1, xmm2 + pshufd xmm2, xmm1, 0x01 + paddd xmm1, xmm2 + paddd xmm0, xmm1 + jg wloop + + movd eax, xmm0 // return hash + ret + } +} + +// Visual C 2012 required for AVX2. +#if _MSC_VER >= 1700 +__declspec(naked) __declspec(align(16)) +uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { + __asm { + mov eax, [esp + 4] // src + mov ecx, [esp + 8] // count + movd xmm0, [esp + 12] // seed + movdqa xmm6, kHash16x33 + + align 4 + wloop: + vpmovzxbd xmm3, dword ptr [eax] // src[0-3] + pmulld xmm0, xmm6 // hash *= 33 ^ 16 + vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7] + pmulld xmm3, kHashMul0 + vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11] + pmulld xmm4, kHashMul1 + vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15] + pmulld xmm2, kHashMul2 + lea eax, [eax + 16] + pmulld xmm1, kHashMul3 + paddd xmm3, xmm4 // add 16 results + paddd xmm1, xmm2 + sub ecx, 16 + paddd xmm1, xmm3 + pshufd xmm2, xmm1, 0x0e // upper 2 dwords + paddd xmm1, xmm2 + pshufd xmm2, xmm1, 0x01 + paddd xmm1, xmm2 + paddd xmm0, xmm1 + jg wloop + + movd eax, xmm0 // return hash + ret + } +} +#endif // _MSC_VER >= 1700 + +#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index c729dab28..9d5752cbb 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -49,17 +49,17 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, ARGBToYRow_C; void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int pix) = ARGBToUV444Row_C; -#if defined(HAS_ARGBTOUV444ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUV444Row = ARGBToUV444Row_SSSE3; - } - } - } -#endif +#if defined(HAS_ARGBTOUV444ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { + ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3; + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { + ARGBToUV444Row = ARGBToUV444Row_SSSE3; + } + } + } +#endif #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { ARGBToYRow = ARGBToYRow_Any_SSSE3; diff --git a/source/row_x86.asm b/source/row_x86.asm index 56188f068..0cb326f8e 100644 --- a/source/row_x86.asm +++ b/source/row_x86.asm @@ -28,7 +28,7 @@ cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix psrlw m2, m2, 8 %endif - align 4 + ALIGN 4 .convertloop: mov%2 m0, [src_yuy2q] mov%2 m1, [src_yuy2q + mmsize] @@ -74,7 +74,7 @@ cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix psrlw m4, m4, 8 sub dst_vq, dst_uq - align 4 + ALIGN 4 .convertloop: mov%1 m0, [src_uvq] mov%1 m1, [src_uvq + mmsize] @@ -113,7 +113,7 @@ SplitUVRow a, cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix sub src_vq, src_uq - align 4 + ALIGN 4 .convertloop: mov%1 m0, [src_uq] mov%1 m1, [src_vq] diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index f35542531..10bdd2c8c 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1,995 +1,995 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include "libyuv/compare.h" -#include "libyuv/convert.h" -#include "libyuv/convert_argb.h" -#include "libyuv/convert_from.h" -#include "libyuv/convert_from_argb.h" -#include "libyuv/cpu_id.h" -#include "libyuv/format_conversion.h" -#ifdef HAVE_JPEG -#include "libyuv/mjpeg_decoder.h" -#endif -#include "libyuv/planar_functions.h" -#include "libyuv/rotate.h" -#include "../unit_test/unit_test.h" - -#if defined(_MSC_VER) -#define SIMD_ALIGNED(var) __declspec(align(16)) var -#else // __GNUC__ -#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) -#endif - -namespace libyuv { - -#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) - -#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ -TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = benchmark_height_; \ - align_buffer_64(src_y, kWidth * kHeight + OFF); \ - align_buffer_64(src_u, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ - align_buffer_64(src_v, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ - align_buffer_64(dst_y_c, kWidth * kHeight); \ - align_buffer_64(dst_u_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_v_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_y_opt, kWidth * kHeight); \ - align_buffer_64(dst_u_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_v_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (random() & 0xff); \ - src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (random() & 0xff); \ - } \ - } \ - MaskCpuFlags(0); \ - SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ - src_u + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - dst_y_c, kWidth, \ - dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ - src_u + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - dst_y_opt, kWidth, \ - dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = \ - abs(static_cast(dst_y_c[i * kWidth + j]) - \ - static_cast(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 0); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = \ - abs(static_cast(dst_u_c[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast(dst_u_opt[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 3); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = \ - abs(static_cast(dst_v_c[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast(dst_v_opt[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 3); \ - free_aligned_buffer_64(dst_y_c) \ - free_aligned_buffer_64(dst_u_c) \ - free_aligned_buffer_64(dst_v_c) \ - free_aligned_buffer_64(dst_y_opt) \ - free_aligned_buffer_64(dst_u_opt) \ - free_aligned_buffer_64(dst_v_opt) \ - free_aligned_buffer_64(src_y) \ - free_aligned_buffer_64(src_u) \ - free_aligned_buffer_64(src_v) \ -} - -#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ - 4, _Any, +, 0) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 1) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) - -TESTPLANARTOP(I420, 2, 2, I420, 2, 2) -TESTPLANARTOP(I422, 2, 1, I420, 2, 2) -TESTPLANARTOP(I444, 1, 1, I420, 2, 2) -TESTPLANARTOP(I411, 4, 1, I420, 2, 2) -TESTPLANARTOP(I420, 2, 2, I422, 2, 1) -TESTPLANARTOP(I420, 2, 2, I444, 1, 1) -TESTPLANARTOP(I420, 2, 2, I411, 4, 1) -TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2) -TESTPLANARTOP(I422, 2, 1, I422, 2, 1) -TESTPLANARTOP(I444, 1, 1, I444, 1, 1) - -#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ -TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = benchmark_height_; \ - align_buffer_64(src_y, kWidth * kHeight + OFF); \ - align_buffer_64(src_u, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ - align_buffer_64(src_v, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ - align_buffer_64(dst_y_c, kWidth * kHeight); \ - align_buffer_64(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_y_opt, kWidth * kHeight); \ - align_buffer_64(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (random() & 0xff); \ - src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (random() & 0xff); \ - } \ - } \ - MaskCpuFlags(0); \ - SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ - src_u + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - dst_y_c, kWidth, \ - dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ - src_u + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - src_v + OFF, \ - SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - dst_y_opt, kWidth, \ - dst_uv_opt, \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X), \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = \ - abs(static_cast(dst_y_c[i * kWidth + j]) - \ - static_cast(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 1); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \ - int abs_diff = \ - abs(static_cast(dst_uv_c[i * \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \ - static_cast(dst_uv_opt[i * \ - SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 1); \ - free_aligned_buffer_64(dst_y_c) \ - free_aligned_buffer_64(dst_uv_c) \ - free_aligned_buffer_64(dst_y_opt) \ - free_aligned_buffer_64(dst_uv_opt) \ - free_aligned_buffer_64(src_y) \ - free_aligned_buffer_64(src_u) \ - free_aligned_buffer_64(src_v) \ -} - -#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ - 4, _Any, +, 0) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 1) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) - -TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2) -TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2) - -#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ -TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = benchmark_height_; \ - align_buffer_64(src_y, kWidth * kHeight + OFF); \ - align_buffer_64(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ - align_buffer_64(dst_y_c, kWidth * kHeight); \ - align_buffer_64(dst_u_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_v_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_y_opt, kWidth * kHeight); \ - align_buffer_64(dst_u_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_v_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ - for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ - src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ - (random() & 0xff); \ - } \ - } \ - MaskCpuFlags(0); \ - SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ - src_uv + OFF, \ - 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - dst_y_c, kWidth, \ - dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ - src_uv + OFF, \ - 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ - dst_y_opt, kWidth, \ - dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = \ - abs(static_cast(dst_y_c[i * kWidth + j]) - \ - static_cast(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 1); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = \ - abs(static_cast(dst_u_c[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast(dst_u_opt[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 1); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = \ - abs(static_cast(dst_v_c[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast(dst_v_opt[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 1); \ - free_aligned_buffer_64(dst_y_c) \ - free_aligned_buffer_64(dst_u_c) \ - free_aligned_buffer_64(dst_v_c) \ - free_aligned_buffer_64(dst_y_opt) \ - free_aligned_buffer_64(dst_u_opt) \ - free_aligned_buffer_64(dst_v_opt) \ - free_aligned_buffer_64(src_y) \ - free_aligned_buffer_64(src_uv) \ -} - -#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ - 4, _Any, +, 0) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 1) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ - FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) - -TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2) -TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) - -#define ALIGNINT(V, ALIGN) (((V) + (ALIGN) - 1) / (ALIGN) * (ALIGN)) - -#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \ -TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ - const int kSizeUV = \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_64(src_y, kWidth * kHeight + OFF); \ - align_buffer_64(src_u, kSizeUV + OFF); \ - align_buffer_64(src_v, kSizeUV + OFF); \ - align_buffer_64(dst_argb_c, kStrideB * kHeight); \ - align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ - memset(dst_argb_c, 0, kStrideB * kHeight); \ - memset(dst_argb_opt, 0, kStrideB * kHeight); \ - srandom(time(NULL)); \ - for (int i = 0; i < kWidth * kHeight; ++i) { \ - src_y[i + OFF] = (random() & 0xff); \ - } \ - for (int i = 0; i < kSizeUV; ++i) { \ - src_u[i + OFF] = (random() & 0xff); \ - src_v[i + OFF] = (random() & 0xff); \ - } \ - MaskCpuFlags(0); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ - src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_argb_c, kStrideB, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ - src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ - src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_argb_opt, kStrideB, \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ - align_buffer_64(dst_argb32_c, kWidth * BPP_C * kHeight); \ - align_buffer_64(dst_argb32_opt, kWidth * BPP_C * kHeight); \ - memset(dst_argb32_c, 0, kWidth * BPP_C * kHeight); \ - memset(dst_argb32_opt, 0, kWidth * BPP_C * kHeight); \ - FMT_B##To##FMT_C(dst_argb_c, kStrideB, \ - dst_argb32_c, kWidth * BPP_C , \ - kWidth, kHeight); \ - FMT_B##To##FMT_C(dst_argb_opt, kStrideB, \ - dst_argb32_opt, kWidth * BPP_C , \ - kWidth, kHeight); \ - for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ - int abs_diff = \ - abs(static_cast(dst_argb32_c[i]) - \ - static_cast(dst_argb32_opt[i])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - free_aligned_buffer_64(src_y) \ - free_aligned_buffer_64(src_u) \ - free_aligned_buffer_64(src_v) \ - free_aligned_buffer_64(dst_argb_c) \ - free_aligned_buffer_64(dst_argb_opt) \ - free_aligned_buffer_64(dst_argb32_c) \ - free_aligned_buffer_64(dst_argb32_opt) \ -} - -#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, DIFF, FMT_C, BPP_C) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C) \ - TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ - YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C) - -// TODO(fbarchard): Make vertical alignment unnecessary on bayer. -TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1, 9, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1, 17, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I411, 4, 1, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1, 1, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1, 1, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1, 0, ARGB, 4) -TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, BayerBGGR, 1, 2, 2, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, BayerRGGB, 1, 2, 2, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, BayerGBRG, 1, 2, 2, 2, ARGB, 4) -TESTPLANARTOB(I420, 2, 2, BayerGRBG, 1, 2, 2, 2, ARGB, 4) - -#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - W1280, DIFF, N, NEG, OFF) \ -TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = benchmark_height_; \ - const int kStrideB = kWidth * BPP_B; \ - align_buffer_64(src_y, kWidth * kHeight + OFF); \ - align_buffer_64(src_uv, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \ - align_buffer_64(dst_argb_c, kStrideB * kHeight); \ - align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ - src_uv[(i * SUBSAMPLE(kWidth, SUBSAMP_X)) * 2 + j + OFF] = \ - (random() & 0xff); \ - } \ - MaskCpuFlags(0); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ - src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ - dst_argb_c, kWidth * BPP_B, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ - src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ - dst_argb_opt, kWidth * BPP_B, \ - kWidth, NEG kHeight); \ - } \ - /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ - align_buffer_64(dst_argb32_c, kWidth * 4 * kHeight); \ - align_buffer_64(dst_argb32_opt, kWidth * 4 * kHeight); \ - memset(dst_argb32_c, 1, kWidth * 4 * kHeight); \ - memset(dst_argb32_opt, 2, kWidth * 4 * kHeight); \ - FMT_B##ToARGB(dst_argb_c, kStrideB, \ - dst_argb32_c, kWidth * 4, \ - kWidth, kHeight); \ - FMT_B##ToARGB(dst_argb_opt, kStrideB, \ - dst_argb32_opt, kWidth * 4, \ - kWidth, kHeight); \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth * 4; ++j) { \ - int abs_diff = \ - abs(static_cast(dst_argb32_c[i * kWidth * 4 + j]) - \ - static_cast(dst_argb32_opt[i * kWidth * 4 + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - free_aligned_buffer_64(src_y) \ - free_aligned_buffer_64(src_uv) \ - free_aligned_buffer_64(dst_argb_c) \ - free_aligned_buffer_64(dst_argb_opt) \ - free_aligned_buffer_64(dst_argb32_c) \ - free_aligned_buffer_64(dst_argb32_opt) \ -} - -#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_ - 4, DIFF, _Any, +, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Unaligned, +, 1) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Invert, -, 0) \ - TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ - benchmark_width_, DIFF, _Opt, +, 0) - -TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) -TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) -TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) -TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9) - -#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - W1280, DIFF, N, NEG, OFF) \ -TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ - const int kStride = \ - (SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ - align_buffer_64(src_argb, kStride * kHeight + OFF); \ - align_buffer_64(dst_y_c, kWidth * kHeight); \ - align_buffer_64(dst_u_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_v_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_y_opt, kWidth * kHeight); \ - align_buffer_64(dst_u_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_v_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_c, 1, kWidth * kHeight); \ - memset(dst_u_c, 0, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_c, 0, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 2, kWidth * kHeight); \ - memset(dst_u_opt, 0, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_v_opt, 0, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \ - MaskCpuFlags(0); \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ - dst_y_c, kWidth, \ - dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ - dst_y_opt, kWidth, \ - dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ - dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = \ - abs(static_cast(dst_y_c[i * kWidth + j]) - \ - static_cast(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = \ - abs(static_cast(dst_u_c[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast(dst_u_opt[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - int abs_diff = \ - abs(static_cast(dst_v_c[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ - static_cast(dst_v_opt[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - free_aligned_buffer_64(dst_y_c) \ - free_aligned_buffer_64(dst_u_c) \ - free_aligned_buffer_64(dst_v_c) \ - free_aligned_buffer_64(dst_y_opt) \ - free_aligned_buffer_64(dst_u_opt) \ - free_aligned_buffer_64(dst_v_opt) \ - free_aligned_buffer_64(src_argb) \ -} - -#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - DIFF) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ - 4, DIFF, _Any, +, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, DIFF, _Unaligned, +, 1) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, DIFF, _Invert, -, 0) \ - TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, DIFF, _Opt, +, 0) - -TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) -#ifdef __arm__ -TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4) -#else -TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0) -#endif -TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) -TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) -TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) -TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) -// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9. -TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) -TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) -TESTATOPLANAR(ARGB, 4, 1, I411, 4, 1, 4) -TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) -TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) -TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) -TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) -TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) -TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2) -TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) -TESTATOPLANAR(BayerBGGR, 1, 2, I420, 2, 2, 4) -TESTATOPLANAR(BayerRGGB, 1, 2, I420, 2, 2, 4) -TESTATOPLANAR(BayerGBRG, 1, 2, I420, 2, 2, 4) -TESTATOPLANAR(BayerGRBG, 1, 2, I420, 2, 2, 4) - -#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - W1280, N, NEG, OFF) \ -TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = benchmark_height_; \ - const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \ - align_buffer_64(src_argb, kStride * kHeight + OFF); \ - align_buffer_64(dst_y_c, kWidth * kHeight); \ - align_buffer_64(dst_uv_c, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_64(dst_y_opt, kWidth * kHeight); \ - align_buffer_64(dst_uv_opt, \ - SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ - SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - srandom(time(NULL)); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kStride; ++j) \ - src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \ - MaskCpuFlags(0); \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ - dst_y_c, kWidth, \ - dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ - dst_y_opt, kWidth, \ - dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth; ++j) { \ - int abs_diff = \ - abs(static_cast(dst_y_c[i * kWidth + j]) - \ - static_cast(dst_y_opt[i * kWidth + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 4); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ - int abs_diff = \ - abs(static_cast(dst_uv_c[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ - static_cast(dst_uv_opt[i * \ - SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - } \ - EXPECT_LE(max_diff, 4); \ - free_aligned_buffer_64(dst_y_c) \ - free_aligned_buffer_64(dst_uv_c) \ - free_aligned_buffer_64(dst_y_opt) \ - free_aligned_buffer_64(dst_uv_opt) \ - free_aligned_buffer_64(src_argb) \ -} - -#define TESTATOBIPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ - TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_ - 4, _Any, +, 0) \ - TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Unaligned, +, 1) \ - TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Invert, -, 0) \ - TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ - benchmark_width_, _Opt, +, 0) - -TESTATOBIPLANAR(ARGB, 4, NV12, 2, 2) -TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2) - -#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ - FMT_B, BPP_B, STRIDE_B, \ - W1280, DIFF, N, NEG, OFF) \ -TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \ - const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ - const int kHeight = benchmark_height_; \ - const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_64(src_argb, kStrideA * kHeight + OFF); \ - align_buffer_64(dst_argb_c, kStrideB * kHeight); \ - align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ - memset(dst_argb_c, 0, kStrideB * kHeight); \ - memset(dst_argb_opt, 0, kStrideB * kHeight); \ - srandom(time(NULL)); \ - for (int i = 0; i < kStrideA * kHeight; ++i) { \ - src_argb[i + OFF] = (random() & 0xff); \ - } \ - MaskCpuFlags(0); \ - FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \ - dst_argb_c, kStrideB, \ - kWidth, NEG kHeight); \ - MaskCpuFlags(-1); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \ - dst_argb_opt, kStrideB, \ - kWidth, NEG kHeight); \ - } \ - int max_diff = 0; \ - for (int i = 0; i < kStrideB * kHeight; ++i) { \ - int abs_diff = \ - abs(static_cast(dst_argb_c[i]) - \ - static_cast(dst_argb_opt[i])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - free_aligned_buffer_64(src_argb) \ - free_aligned_buffer_64(dst_argb_c) \ - free_aligned_buffer_64(dst_argb_opt) \ -} - -#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ - FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ -TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \ - srandom(time(NULL)); \ - for (int times = 0; times < benchmark_iterations_; ++times) { \ - const int kWidth = (random() & 63) + 1; \ - const int kHeight = (random() & 31) + 1; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ - const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\ - const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\ - align_buffer_page_end(src_argb, kStrideA * kHeightA); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ - memset(dst_argb_c, 0, kStrideB * kHeightB); \ - memset(dst_argb_opt, 0, kStrideB * kHeightB); \ - for (int i = 0; i < kStrideA * kHeightA; ++i) { \ - src_argb[i] = (random() & 0xff); \ - } \ - MaskCpuFlags(0); \ - FMT_A##To##FMT_B(src_argb, kStrideA, \ - dst_argb_c, kStrideB, \ - kWidth, kHeight); \ - MaskCpuFlags(-1); \ - FMT_A##To##FMT_B(src_argb, kStrideA, \ - dst_argb_opt, kStrideB, \ - kWidth, kHeight); \ - int max_diff = 0; \ - for (int i = 0; i < kStrideB * kHeightB; ++i) { \ - int abs_diff = \ - abs(static_cast(dst_argb_c[i]) - \ - static_cast(dst_argb_opt[i])); \ - if (abs_diff > max_diff) { \ - max_diff = abs_diff; \ - } \ - } \ - EXPECT_LE(max_diff, DIFF); \ - free_aligned_buffer_page_end(src_argb) \ - free_aligned_buffer_page_end(dst_argb_c) \ - free_aligned_buffer_page_end(dst_argb_opt) \ - } \ -} - -#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ - FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ - FMT_B, BPP_B, STRIDE_B, \ - benchmark_width_ - 4, DIFF, _Any, +, 0) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ - FMT_B, BPP_B, STRIDE_B, \ - benchmark_width_, DIFF, _Unaligned, +, 1) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ - FMT_B, BPP_B, STRIDE_B, \ - benchmark_width_, DIFF, _Invert, -, 0) \ - TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ - FMT_B, BPP_B, STRIDE_B, \ - benchmark_width_, DIFF, _Opt, +, 0) \ - TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ - FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) - -TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0) -TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) -TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) -TESTATOB(ARGB, 4, 4, 1, BayerBGGR, 1, 2, 2, 0) -TESTATOB(ARGB, 4, 4, 1, BayerRGGB, 1, 2, 2, 0) -TESTATOB(ARGB, 4, 4, 1, BayerGBRG, 1, 2, 2, 0) -TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0) -TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) -TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) -TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) -TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) -TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) -TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) -TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) -TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) -TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 4) -TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 4) -TESTATOB(BayerBGGR, 1, 2, 2, ARGB, 4, 4, 1, 0) -TESTATOB(BayerRGGB, 1, 2, 2, ARGB, 4, 4, 1, 0) -TESTATOB(BayerGBRG, 1, 2, 2, ARGB, 4, 4, 1, 0) -TESTATOB(BayerGRBG, 1, 2, 2, ARGB, 4, 4, 1, 0) -TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) -TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) -TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) -TESTATOB(Y, 1, 1, 1, ARGB, 4, 4, 1, 0) -TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) - -TEST_F(libyuvTest, Test565) { - SIMD_ALIGNED(uint8 orig_pixels[256][4]); - SIMD_ALIGNED(uint8 pixels565[256][2]); - - for (int i = 0; i < 256; ++i) { - for (int j = 0; j < 4; ++j) { - orig_pixels[i][j] = i; - } - } - ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); - uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); - EXPECT_EQ(610919429u, checksum); -} - -#ifdef HAVE_JPEG -TEST_F(libyuvTest, ValidateJpeg) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? - benchmark_width_ * benchmark_height_ : kMinJpeg; - const int kSize = kImageSize + kOff; - align_buffer_64(orig_pixels, kSize); - - // No SOI or EOI. Expect fail. - memset(orig_pixels, 0, kSize); - - // EOI, SOI. Expect pass. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - for (int times = 0; times < benchmark_iterations_; ++times) { - EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); - } - free_aligned_buffer_page_end(orig_pixels); -} - -TEST_F(libyuvTest, InvalidateJpeg) { - const int kOff = 10; - const int kMinJpeg = 64; - const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? - benchmark_width_ * benchmark_height_ : kMinJpeg; - const int kSize = kImageSize + kOff; - align_buffer_64(orig_pixels, kSize); - - // No SOI or EOI. Expect fail. - memset(orig_pixels, 0, kSize); - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - // SOI but no EOI. Expect fail. - orig_pixels[0] = 0xff; - orig_pixels[1] = 0xd8; // SOI. - for (int times = 0; times < benchmark_iterations_; ++times) { - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - } - // EOI but no SOI. Expect fail. - orig_pixels[0] = 0; - orig_pixels[1] = 0; - orig_pixels[kSize - kOff + 0] = 0xff; - orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); - - free_aligned_buffer_page_end(orig_pixels); -} - -#endif - -} // namespace libyuv +/* + * Copyright 2011 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "libyuv/compare.h" +#include "libyuv/convert.h" +#include "libyuv/convert_argb.h" +#include "libyuv/convert_from.h" +#include "libyuv/convert_from_argb.h" +#include "libyuv/cpu_id.h" +#include "libyuv/format_conversion.h" +#ifdef HAVE_JPEG +#include "libyuv/mjpeg_decoder.h" +#endif +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "../unit_test/unit_test.h" + +#if defined(_MSC_VER) +#define SIMD_ALIGNED(var) __declspec(align(16)) var +#else // __GNUC__ +#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) +#endif + +namespace libyuv { + +#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) + +#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_64(src_y, kWidth * kHeight + OFF); \ + align_buffer_64(src_u, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_64(src_v, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_64(dst_y_c, kWidth * kHeight); \ + align_buffer_64(dst_u_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_y_opt, kWidth * kHeight); \ + align_buffer_64(dst_u_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_v_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + srandom(time(NULL)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (random() & 0xff); \ + src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (random() & 0xff); \ + } \ + } \ + MaskCpuFlags(0); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_u + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_c, kWidth, \ + dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_u + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_opt, kWidth, \ + dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_y_c[i * kWidth + j]) - \ + static_cast(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 0); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast(dst_u_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast(dst_u_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 3); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast(dst_v_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast(dst_v_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 3); \ + free_aligned_buffer_64(dst_y_c) \ + free_aligned_buffer_64(dst_u_c) \ + free_aligned_buffer_64(dst_v_c) \ + free_aligned_buffer_64(dst_y_opt) \ + free_aligned_buffer_64(dst_u_opt) \ + free_aligned_buffer_64(dst_v_opt) \ + free_aligned_buffer_64(src_y) \ + free_aligned_buffer_64(src_u) \ + free_aligned_buffer_64(src_v) \ +} + +#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTPLANARTOP(I420, 2, 2, I420, 2, 2) +TESTPLANARTOP(I422, 2, 1, I420, 2, 2) +TESTPLANARTOP(I444, 1, 1, I420, 2, 2) +TESTPLANARTOP(I411, 4, 1, I420, 2, 2) +TESTPLANARTOP(I420, 2, 2, I422, 2, 1) +TESTPLANARTOP(I420, 2, 2, I444, 1, 1) +TESTPLANARTOP(I420, 2, 2, I411, 4, 1) +TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2) +TESTPLANARTOP(I422, 2, 1, I422, 2, 1) +TESTPLANARTOP(I444, 1, 1, I444, 1, 1) + +#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_64(src_y, kWidth * kHeight + OFF); \ + align_buffer_64(src_u, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_64(src_v, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_64(dst_y_c, kWidth * kHeight); \ + align_buffer_64(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_y_opt, kWidth * kHeight); \ + align_buffer_64(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + srandom(time(NULL)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (random() & 0xff); \ + src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (random() & 0xff); \ + } \ + } \ + MaskCpuFlags(0); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_u + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_c, kWidth, \ + dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_u + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + src_v + OFF, \ + SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_opt, kWidth, \ + dst_uv_opt, \ + SUBSAMPLE(kWidth * 2, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_y_c[i * kWidth + j]) - \ + static_cast(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast(dst_uv_c[i * \ + SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \ + static_cast(dst_uv_opt[i * \ + SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + free_aligned_buffer_64(dst_y_c) \ + free_aligned_buffer_64(dst_uv_c) \ + free_aligned_buffer_64(dst_y_opt) \ + free_aligned_buffer_64(dst_uv_opt) \ + free_aligned_buffer_64(src_y) \ + free_aligned_buffer_64(src_u) \ + free_aligned_buffer_64(src_v) \ +} + +#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTPLANARTOBP(I420, 2, 2, NV12, 2, 2) +TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2) + +#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ +TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + align_buffer_64(src_y, kWidth * kHeight + OFF); \ + align_buffer_64(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + OFF); \ + align_buffer_64(dst_y_c, kWidth * kHeight); \ + align_buffer_64(dst_u_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_y_opt, kWidth * kHeight); \ + align_buffer_64(dst_u_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_v_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + srandom(time(NULL)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ + for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ + src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ + (random() & 0xff); \ + } \ + } \ + MaskCpuFlags(0); \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_uv + OFF, \ + 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_c, kWidth, \ + dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + SRC_FMT_PLANAR##To##FMT_PLANAR(src_y + OFF, kWidth, \ + src_uv + OFF, \ + 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ + dst_y_opt, kWidth, \ + dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_y_c[i * kWidth + j]) - \ + static_cast(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast(dst_u_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast(dst_u_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast(dst_v_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast(dst_v_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 1); \ + free_aligned_buffer_64(dst_y_c) \ + free_aligned_buffer_64(dst_u_c) \ + free_aligned_buffer_64(dst_v_c) \ + free_aligned_buffer_64(dst_y_opt) \ + free_aligned_buffer_64(dst_u_opt) \ + free_aligned_buffer_64(dst_v_opt) \ + free_aligned_buffer_64(src_y) \ + free_aligned_buffer_64(src_uv) \ +} + +#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ + FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2) +TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) + +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN) - 1) / (ALIGN) * (ALIGN)) + +#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \ +TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ + const int kSizeUV = \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ + align_buffer_64(src_y, kWidth * kHeight + OFF); \ + align_buffer_64(src_u, kSizeUV + OFF); \ + align_buffer_64(src_v, kSizeUV + OFF); \ + align_buffer_64(dst_argb_c, kStrideB * kHeight); \ + align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ + memset(dst_argb_c, 0, kStrideB * kHeight); \ + memset(dst_argb_opt, 0, kStrideB * kHeight); \ + srandom(time(NULL)); \ + for (int i = 0; i < kWidth * kHeight; ++i) { \ + src_y[i + OFF] = (random() & 0xff); \ + } \ + for (int i = 0; i < kSizeUV; ++i) { \ + src_u[i + OFF] = (random() & 0xff); \ + src_v[i + OFF] = (random() & 0xff); \ + } \ + MaskCpuFlags(0); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_argb_c, kStrideB, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_argb_opt, kStrideB, \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_64(dst_argb32_c, kWidth * BPP_C * kHeight); \ + align_buffer_64(dst_argb32_opt, kWidth * BPP_C * kHeight); \ + memset(dst_argb32_c, 0, kWidth * BPP_C * kHeight); \ + memset(dst_argb32_opt, 0, kWidth * BPP_C * kHeight); \ + FMT_B##To##FMT_C(dst_argb_c, kStrideB, \ + dst_argb32_c, kWidth * BPP_C , \ + kWidth, kHeight); \ + FMT_B##To##FMT_C(dst_argb_opt, kStrideB, \ + dst_argb32_opt, kWidth * BPP_C , \ + kWidth, kHeight); \ + for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ + int abs_diff = \ + abs(static_cast(dst_argb32_c[i]) - \ + static_cast(dst_argb32_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_64(src_y) \ + free_aligned_buffer_64(src_u) \ + free_aligned_buffer_64(src_v) \ + free_aligned_buffer_64(dst_argb_c) \ + free_aligned_buffer_64(dst_argb_opt) \ + free_aligned_buffer_64(dst_argb32_c) \ + free_aligned_buffer_64(dst_argb32_opt) \ +} + +#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, DIFF, FMT_C, BPP_C) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C) \ + TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ + YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C) + +// TODO(fbarchard): Make vertical alignment unnecessary on bayer. +TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, RGBA, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, RAW, 3, 3, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, RGB24, 3, 3, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, ARGB1555, 2, 2, 1, 9, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, ARGB4444, 2, 2, 1, 17, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I411, 4, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1, 1, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1, 1, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1, 0, ARGB, 4) +TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, BayerBGGR, 1, 2, 2, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, BayerRGGB, 1, 2, 2, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, BayerGBRG, 1, 2, 2, 2, ARGB, 4) +TESTPLANARTOB(I420, 2, 2, BayerGRBG, 1, 2, 2, 2, ARGB, 4) + +#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + W1280, DIFF, N, NEG, OFF) \ +TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = kWidth * BPP_B; \ + align_buffer_64(src_y, kWidth * kHeight + OFF); \ + align_buffer_64(src_uv, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \ + align_buffer_64(dst_argb_c, kStrideB * kHeight); \ + align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ + srandom(time(NULL)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ + src_uv[(i * SUBSAMPLE(kWidth, SUBSAMP_X)) * 2 + j + OFF] = \ + (random() & 0xff); \ + } \ + MaskCpuFlags(0); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ + dst_argb_c, kWidth * BPP_B, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ + src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ + dst_argb_opt, kWidth * BPP_B, \ + kWidth, NEG kHeight); \ + } \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_64(dst_argb32_c, kWidth * 4 * kHeight); \ + align_buffer_64(dst_argb32_opt, kWidth * 4 * kHeight); \ + memset(dst_argb32_c, 1, kWidth * 4 * kHeight); \ + memset(dst_argb32_opt, 2, kWidth * 4 * kHeight); \ + FMT_B##ToARGB(dst_argb_c, kStrideB, \ + dst_argb32_c, kWidth * 4, \ + kWidth, kHeight); \ + FMT_B##ToARGB(dst_argb_opt, kStrideB, \ + dst_argb32_opt, kWidth * 4, \ + kWidth, kHeight); \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth * 4; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_argb32_c[i * kWidth * 4 + j]) - \ + static_cast(dst_argb32_opt[i * kWidth * 4 + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_64(src_y) \ + free_aligned_buffer_64(src_uv) \ + free_aligned_buffer_64(dst_argb_c) \ + free_aligned_buffer_64(dst_argb_opt) \ + free_aligned_buffer_64(dst_argb32_c) \ + free_aligned_buffer_64(dst_argb32_opt) \ +} + +#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ + benchmark_width_, DIFF, _Opt, +, 0) + +TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2) +TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2) +TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) +TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9) + +#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, DIFF, N, NEG, OFF) \ +TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ + const int kStride = \ + (SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ + align_buffer_64(src_argb, kStride * kHeight + OFF); \ + align_buffer_64(dst_y_c, kWidth * kHeight); \ + align_buffer_64(dst_u_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_v_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_y_opt, kWidth * kHeight); \ + align_buffer_64(dst_u_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_v_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_u_c, 0, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_c, 0, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_y_opt, 2, kWidth * kHeight); \ + memset(dst_u_opt, 0, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + memset(dst_v_opt, 0, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + srandom(time(NULL)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \ + MaskCpuFlags(0); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_c, kWidth, \ + dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_opt, kWidth, \ + dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_y_c[i * kWidth + j]) - \ + static_cast(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast(dst_u_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast(dst_u_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ + int abs_diff = \ + abs(static_cast(dst_v_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ + static_cast(dst_v_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_64(dst_y_c) \ + free_aligned_buffer_64(dst_u_c) \ + free_aligned_buffer_64(dst_v_c) \ + free_aligned_buffer_64(dst_y_opt) \ + free_aligned_buffer_64(dst_u_opt) \ + free_aligned_buffer_64(dst_v_opt) \ + free_aligned_buffer_64(src_argb) \ +} + +#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + DIFF) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, DIFF, _Opt, +, 0) + +TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) +#ifdef __arm__ +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4) +#else +TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0) +#endif +TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) +TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) +TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) +TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) +// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9. +TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15) +TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) +TESTATOPLANAR(ARGB, 4, 1, I411, 4, 1, 4) +TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2) +TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2) +TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) +TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2) +TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2) +TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) +TESTATOPLANAR(BayerBGGR, 1, 2, I420, 2, 2, 4) +TESTATOPLANAR(BayerRGGB, 1, 2, I420, 2, 2, 4) +TESTATOPLANAR(BayerGBRG, 1, 2, I420, 2, 2, 4) +TESTATOPLANAR(BayerGRBG, 1, 2, I420, 2, 2, 4) + +#define TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + W1280, N, NEG, OFF) \ +TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \ + align_buffer_64(src_argb, kStride * kHeight + OFF); \ + align_buffer_64(dst_y_c, kWidth * kHeight); \ + align_buffer_64(dst_uv_c, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + align_buffer_64(dst_y_opt, kWidth * kHeight); \ + align_buffer_64(dst_uv_opt, \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ + SUBSAMPLE(kHeight, SUBSAMP_Y)); \ + srandom(time(NULL)); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kStride; ++j) \ + src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \ + MaskCpuFlags(0); \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_c, kWidth, \ + dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ + dst_y_opt, kWidth, \ + dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_y_c[i * kWidth + j]) - \ + static_cast(dst_y_opt[i * kWidth + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 4); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ + int abs_diff = \ + abs(static_cast(dst_uv_c[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ + static_cast(dst_uv_opt[i * \ + SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + } \ + EXPECT_LE(max_diff, 4); \ + free_aligned_buffer_64(dst_y_c) \ + free_aligned_buffer_64(dst_uv_c) \ + free_aligned_buffer_64(dst_y_opt) \ + free_aligned_buffer_64(dst_uv_opt) \ + free_aligned_buffer_64(src_argb) \ +} + +#define TESTATOBIPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ + TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_ - 4, _Any, +, 0) \ + TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Unaligned, +, 1) \ + TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Invert, -, 0) \ + TESTATOBIPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ + benchmark_width_, _Opt, +, 0) + +TESTATOBIPLANAR(ARGB, 4, NV12, 2, 2) +TESTATOBIPLANAR(ARGB, 4, NV21, 2, 2) + +#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + W1280, DIFF, N, NEG, OFF) \ +TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \ + const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ + const int kHeight = benchmark_height_; \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_64(src_argb, kStrideA * kHeight + OFF); \ + align_buffer_64(dst_argb_c, kStrideB * kHeight); \ + align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ + memset(dst_argb_c, 0, kStrideB * kHeight); \ + memset(dst_argb_opt, 0, kStrideB * kHeight); \ + srandom(time(NULL)); \ + for (int i = 0; i < kStrideA * kHeight; ++i) { \ + src_argb[i + OFF] = (random() & 0xff); \ + } \ + MaskCpuFlags(0); \ + FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \ + dst_argb_c, kStrideB, \ + kWidth, NEG kHeight); \ + MaskCpuFlags(-1); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_A##To##FMT_B(src_argb + OFF, kStrideA, \ + dst_argb_opt, kStrideB, \ + kWidth, NEG kHeight); \ + } \ + int max_diff = 0; \ + for (int i = 0; i < kStrideB * kHeight; ++i) { \ + int abs_diff = \ + abs(static_cast(dst_argb_c[i]) - \ + static_cast(dst_argb_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_64(src_argb) \ + free_aligned_buffer_64(dst_argb_c) \ + free_aligned_buffer_64(dst_argb_opt) \ +} + +#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ +TEST_F(libyuvTest, FMT_A##To##FMT_B##_Random) { \ + srandom(time(NULL)); \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (random() & 63) + 1; \ + const int kHeight = (random() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;\ + const int kStrideB = (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;\ + align_buffer_page_end(src_argb, kStrideA * kHeightA); \ + align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ + memset(dst_argb_c, 0, kStrideB * kHeightB); \ + memset(dst_argb_opt, 0, kStrideB * kHeightB); \ + for (int i = 0; i < kStrideA * kHeightA; ++i) { \ + src_argb[i] = (random() & 0xff); \ + } \ + MaskCpuFlags(0); \ + FMT_A##To##FMT_B(src_argb, kStrideA, \ + dst_argb_c, kStrideB, \ + kWidth, kHeight); \ + MaskCpuFlags(-1); \ + FMT_A##To##FMT_B(src_argb, kStrideA, \ + dst_argb_opt, kStrideB, \ + kWidth, kHeight); \ + int max_diff = 0; \ + for (int i = 0; i < kStrideB * kHeightB; ++i) { \ + int abs_diff = \ + abs(static_cast(dst_argb_c[i]) - \ + static_cast(dst_argb_opt[i])); \ + if (abs_diff > max_diff) { \ + max_diff = abs_diff; \ + } \ + } \ + EXPECT_LE(max_diff, DIFF); \ + free_aligned_buffer_page_end(src_argb) \ + free_aligned_buffer_page_end(dst_argb_c) \ + free_aligned_buffer_page_end(dst_argb_opt) \ + } \ +} + +#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + benchmark_width_ - 4, DIFF, _Any, +, 0) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + benchmark_width_, DIFF, _Unaligned, +, 1) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + benchmark_width_, DIFF, _Invert, -, 0) \ + TESTATOBI(FMT_A, BPP_A, STRIDE_A, \ + FMT_B, BPP_B, STRIDE_B, \ + benchmark_width_, DIFF, _Opt, +, 0) \ + TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, \ + FMT_B, BPP_B, STRIDE_B, HEIGHT_B, DIFF) + +TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) +TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) +TESTATOB(ARGB, 4, 4, 1, BayerBGGR, 1, 2, 2, 0) +TESTATOB(ARGB, 4, 4, 1, BayerRGGB, 1, 2, 2, 0) +TESTATOB(ARGB, 4, 4, 1, BayerGBRG, 1, 2, 2, 0) +TESTATOB(ARGB, 4, 4, 1, BayerGRBG, 1, 2, 2, 0) +TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) +TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) +TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2) +TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2) +TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) +TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0) +TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, 4) +TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, 4) +TESTATOB(BayerBGGR, 1, 2, 2, ARGB, 4, 4, 1, 0) +TESTATOB(BayerRGGB, 1, 2, 2, ARGB, 4, 4, 1, 0) +TESTATOB(BayerGBRG, 1, 2, 2, ARGB, 4, 4, 1, 0) +TESTATOB(BayerGRBG, 1, 2, 2, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0) +TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0) +TESTATOB(Y, 1, 1, 1, ARGB, 4, 4, 1, 0) +TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0) + +TEST_F(libyuvTest, Test565) { + SIMD_ALIGNED(uint8 orig_pixels[256][4]); + SIMD_ALIGNED(uint8 pixels565[256][2]); + + for (int i = 0; i < 256; ++i) { + for (int j = 0; j < 4; ++j) { + orig_pixels[i][j] = i; + } + } + ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); + uint32 checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); + EXPECT_EQ(610919429u, checksum); +} + +#ifdef HAVE_JPEG +TEST_F(libyuvTest, ValidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? + benchmark_width_ * benchmark_height_ : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_64(orig_pixels, kSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + + // EOI, SOI. Expect pass. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); + } + free_aligned_buffer_page_end(orig_pixels); +} + +TEST_F(libyuvTest, InvalidateJpeg) { + const int kOff = 10; + const int kMinJpeg = 64; + const int kImageSize = benchmark_width_ * benchmark_height_ >= kMinJpeg ? + benchmark_width_ * benchmark_height_ : kMinJpeg; + const int kSize = kImageSize + kOff; + align_buffer_64(orig_pixels, kSize); + + // No SOI or EOI. Expect fail. + memset(orig_pixels, 0, kSize); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + // SOI but no EOI. Expect fail. + orig_pixels[0] = 0xff; + orig_pixels[1] = 0xd8; // SOI. + for (int times = 0; times < benchmark_iterations_; ++times) { + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + } + // EOI but no SOI. Expect fail. + orig_pixels[0] = 0; + orig_pixels[1] = 0; + orig_pixels[kSize - kOff + 0] = 0xff; + orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); + + free_aligned_buffer_page_end(orig_pixels); +} + +#endif + +} // namespace libyuv