diff --git a/README.chromium b/README.chromium index 720dea5b7..8e6d8cb40 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1781 +Version: 1782 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 40869ef21..c24430cc6 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -693,6 +693,19 @@ int RAWToI420(const uint8_t* src_raw, int width, int height); +// RGB big endian (rgb in memory) to J420. +LIBYUV_API +int RAWToJ420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + // RGB16 (RGBP fourcc) little endian to I420. LIBYUV_API int RGB565ToI420(const uint8_t* src_rgb565, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 324d32f94..5aa410032 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1781 +#define LIBYUV_VERSION 1782 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index 1bd596599..768e0f379 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -1368,6 +1368,18 @@ int ARGBToI420(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } +#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYRow = ARGBToYRow_Any_NEON; + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } + } +#endif #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; @@ -1388,22 +1400,6 @@ int ARGBToI420(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } -#endif #if defined(HAS_ARGBTOYROW_MMI) && defined(HAS_ARGBTOUVROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { ARGBToYRow = ARGBToYRow_Any_MMI; @@ -1771,7 +1767,7 @@ int RGB24ToI420(const uint8_t* src_rgb24, } // Neon version does direct RGB24 to YUV. -#if defined(HAS_RGB24TOYROW_NEON) +#if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB24ToUVRow = RGB24ToUVRow_Any_NEON; RGB24ToYRow = RGB24ToYRow_Any_NEON; @@ -1808,6 +1804,14 @@ int RGB24ToI420(const uint8_t* src_rgb24, #endif // Other platforms do intermediate conversion from RGB24 to ARGB. #else +#if defined(HAS_RGB24TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } + } +#endif #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; @@ -1816,6 +1820,18 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } #endif +#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } + } +#endif #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; @@ -1960,6 +1976,14 @@ int RGB24ToJ420(const uint8_t* src_rgb24, } #endif #else +#if defined(HAS_RGB24TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } + } +#endif #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; @@ -1968,6 +1992,18 @@ int RGB24ToJ420(const uint8_t* src_rgb24, } } #endif +#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVJRow = ARGBToUVJRow_Any_NEON; + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYJRow = ARGBToYJRow_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_NEON; + } + } + } +#endif #if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; @@ -2111,6 +2147,26 @@ int RAWToI420(const uint8_t* src_raw, #endif // Other platforms do intermediate conversion from RAW to ARGB. #else +#if defined(HAS_RAWTOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToARGBRow = RAWToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToARGBRow = RAWToARGBRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYROW_NEON) && defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + ARGBToYRow = ARGBToYRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYRow = ARGBToYRow_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } + } +#endif #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -2186,6 +2242,178 @@ int RAWToI420(const uint8_t* src_raw, return 0; } +// TODO(fbarchard): Use Matrix version to implement I420 and J420. +// Convert RAW to J420. +LIBYUV_API +int RAWToJ420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + int y; +#if (defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ + defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI) + void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RAWToUVJRow_C; + void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = + RAWToYJRow_C; +#else + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYJRow_C; +#endif + if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + +// Neon version does direct RAW to YUV. +#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToUVJRow = RAWToUVJRow_Any_NEON; + RAWToYJRow = RAWToYJRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToYJRow = RAWToYJRow_NEON; + if (IS_ALIGNED(width, 16)) { + RAWToUVJRow = RAWToUVJRow_NEON; + } + } + } +// MMI and MSA version does direct RAW to YUV. +#elif (defined(HAS_RAWTOYJROW_MMI) || defined(HAS_RAWTOYJROW_MSA)) +#if defined(HAS_RAWTOYJROW_MMI) && defined(HAS_RAWTOUVJROW_MMI) + if (TestCpuFlag(kCpuHasMMI)) { + RAWToUVJRow = RAWToUVJRow_Any_MMI; + RAWToYJRow = RAWToYJRow_Any_MMI; + if (IS_ALIGNED(width, 8)) { + RAWToYJRow = RAWToYJRow_MMI; + if (IS_ALIGNED(width, 16)) { + RAWToUVJRow = RAWToUVJRow_MMI; + } + } + } +#endif +#if defined(HAS_RAWTOYJROW_MSA) && defined(HAS_RAWTOUVJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RAWToUVJRow = RAWToUVJRow_Any_MSA; + RAWToYJRow = RAWToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_MSA; + RAWToUVJRow = RAWToUVJRow_MSA; + } + } +#endif +#else +#if defined(HAS_RAWTOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToARGBRow = RAWToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToARGBRow = RAWToARGBRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_NEON) && defined(HAS_ARGBTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVJRow = ARGBToUVJRow_Any_NEON; + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToYJRow = ARGBToYJRow_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_NEON; + } + } + } +#endif +#if defined(HAS_RAWTOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RAWToARGBRow = RAWToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; + ARGBToYJRow = ARGBToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#endif + + { +#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ + defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)) + // Allocate 2 rows of ARGB. + const int kRowSize = (width * 4 + 31) & ~31; + align_buffer_64(row, kRowSize * 2); +#endif + + for (y = 0; y < height - 1; y += 2) { +#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ + defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)) + RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width); + RAWToYJRow(src_raw, dst_y, width); + RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); +#else + RAWToARGBRow(src_raw, row, width); + RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); + ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); + ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); +#endif + src_raw += src_stride_raw * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { +#if ((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ + defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)) + RAWToUVJRow(src_raw, 0, dst_u, dst_v, width); + RAWToYJRow(src_raw, dst_y, width); +#else + RAWToARGBRow(src_raw, row, width); + ARGBToUVJRow(row, 0, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); +#endif + } +#if !((defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON)) || \ + defined(HAS_RAWTOYJROW_MSA) || defined(HAS_RAWTOYJROW_MMI)) + free_aligned_buffer_64(row); +#endif + } + return 0; +} + // Convert RGB565 to I420. LIBYUV_API int RGB565ToI420(const uint8_t* src_rgb565, diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 0bd330ec3..16b5ff92e 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -3376,14 +3376,14 @@ int AR30ToAB30(const uint8_t* src_ar30, // Convert AR64 to ARGB. LIBYUV_API int AR64ToARGB(const uint16_t* src_ar64, - int src_stride_ar64, - uint8_t* dst_argb, - int dst_stride_argb, - int width, - int height) { + int src_stride_ar64, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; void (*AR64ToARGBRow)(const uint16_t* src_ar64, uint8_t* dst_argb, - int width) = AR64ToARGBRow_C; + int width) = AR64ToARGBRow_C; if (!src_ar64 || !dst_argb || width <= 0 || height == 0) { return -1; } diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 49d453972..1b4ad9b03 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1116,8 +1116,7 @@ void ARGBToAB64Row_SSSE3(const uint8_t* src_argb, asm volatile( "movdqa %3,%%xmm2 \n" - "movdqa %4,%%xmm3 \n" - LABELALIGN + "movdqa %4,%%xmm3 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" @@ -1129,11 +1128,11 @@ void ARGBToAB64Row_SSSE3(const uint8_t* src_argb, "lea 0x20(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_ab64), // %1 - "+r"(width) // %2 - : "m"(kShuffleARGBToAB64Lo), // %3 - "m"(kShuffleARGBToAB64Hi) // %4 + : "+r"(src_argb), // %0 + "+r"(dst_ab64), // %1 + "+r"(width) // %2 + : "m"(kShuffleARGBToAB64Lo), // %3 + "m"(kShuffleARGBToAB64Hi) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2"); } @@ -1166,8 +1165,7 @@ void AB64ToARGBRow_SSSE3(const uint16_t* src_ar64, int width) { asm volatile( - "movdqa %3,%%xmm2 \n" - LABELALIGN + "movdqa %3,%%xmm2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" @@ -1220,8 +1218,7 @@ void ARGBToAB64Row_AVX2(const uint8_t* src_argb, asm volatile( "vbroadcastf128 %3,%%ymm2 \n" - "vbroadcastf128 %4,%%ymm3 \n" - LABELALIGN + "vbroadcastf128 %4,%%ymm3 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" @@ -1233,11 +1230,11 @@ void ARGBToAB64Row_AVX2(const uint8_t* src_argb, "lea 0x40(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_ab64), // %1 - "+r"(width) // %2 + : "+r"(src_argb), // %0 + "+r"(dst_ab64), // %1 + "+r"(width) // %2 : "m"(kShuffleARGBToAB64Lo), // %3 - "m"(kShuffleARGBToAB64Hi) // %3 + "m"(kShuffleARGBToAB64Hi) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif @@ -1275,8 +1272,7 @@ void AB64ToARGBRow_AVX2(const uint16_t* src_ar64, int width) { asm volatile( - "vbroadcastf128 %3,%%ymm2 \n" - LABELALIGN + "vbroadcastf128 %3,%%ymm2 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" diff --git a/source/row_neon.cc b/source/row_neon.cc index 5414d1ef4..5d109a3b4 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -2191,7 +2191,7 @@ void AR64ToARGBRow_NEON(const uint16_t* src_ar64, : "cc", "memory", "q0", "q1", "q2", "q3"); } -static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15}; +static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15}; void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, @@ -2362,9 +2362,9 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) { "1: \n" "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q4, d0, d4 \n" // B + "vmull.u8 q4, d0, d4 \n" // R "vmlal.u8 q4, d1, d5 \n" // G - "vmlal.u8 q4, d2, d6 \n" // R + "vmlal.u8 q4, d2, d6 \n" // B "vqrshrn.u16 d0, q4, #8 \n" // 16 bit to 8 bit Y "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 9662cd3cb..3281e90f1 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1628,10 +1628,10 @@ void AR64ToARGBRow_NEON(const uint16_t* src_ar64, "subs %w2, %w2, #8 \n" // 8 processed per loop. "stp q0, q2, [%1], #32 \n" // store 8 pixels "b.gt 1b \n" - : "+r"(src_ar64), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAR64ToARGB) // %3 + : "+r"(src_ar64), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleAR64ToARGB) // %3 : "cc", "memory", "v0", "v1", "v2", "v3", "v4"); } @@ -2506,9 +2506,9 @@ void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) { "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. "prfm pldl1keep, [%0, 448] \n" "subs %w2, %w2, #8 \n" // 8 processed per loop. - "umull v0.8h, v0.8b, v4.8b \n" // B + "umull v0.8h, v0.8b, v4.8b \n" // R "umlal v0.8h, v1.8b, v5.8b \n" // G - "umlal v0.8h, v2.8b, v6.8b \n" // R + "umlal v0.8h, v2.8b, v6.8b \n" // B "uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index e83eea9ad..68cd8027c 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1140,6 +1140,7 @@ TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2) TESTATOPLANAR(I400, 1, 1, I420, 2, 2) TESTATOPLANAR(J400, 1, 1, J420, 2, 2) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2) +TESTATOPLANAR(RAW, 3, 1, J420, 2, 2) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2) TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2) TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2) @@ -1226,10 +1227,11 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) const int kStrideB = \ (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ align_buffer_page_end(src_argb, \ - kStrideA* kHeightA * sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeightB * sizeof(TYPE_B)); \ - align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB * sizeof(TYPE_B)); \ - for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ src_argb[i + OFF] = (fastrand() & 0xff); \ } \ memset(dst_argb_c, 1, kStrideB* kHeightB); \ @@ -1242,7 +1244,7 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, \ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ } \ - for (int i = 0; i < kStrideB * kHeightB * sizeof(TYPE_B); ++i) { \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ } \ free_aligned_buffer_page_end(src_argb); \ @@ -1250,40 +1252,41 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2) free_aligned_buffer_page_end(dst_argb_opt); \ } -#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \ - TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \ - for (int times = 0; times < benchmark_iterations_; ++times) { \ - const int kWidth = (fastrand() & 63) + 1; \ - const int kHeight = (fastrand() & 31) + 1; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ - const int kStrideA = \ - (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - const int kStrideB = \ - (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, kStrideA* kHeightA * sizeof(TYPE_A)); \ - align_buffer_page_end(dst_argb_c, kStrideB* kHeightB * sizeof(TYPE_B)); \ - align_buffer_page_end(dst_argb_opt, \ - kStrideB* kHeightB * sizeof(TYPE_B)); \ - for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \ - src_argb[i] = 0xfe; \ - } \ - memset(dst_argb_c, 123, kStrideB* kHeightB); \ - memset(dst_argb_opt, 123, kStrideB* kHeightB); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \ - kStrideB, kWidth, kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \ - kStrideB, kWidth, kHeight); \ - for (int i = 0; i < kStrideB * kHeightB * sizeof(TYPE_B); ++i) { \ - EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_argb); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } \ +#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \ + TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (fastrand() & 63) + 1; \ + const int kHeight = (fastrand() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + align_buffer_page_end(dst_argb_c, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i] = 0xfe; \ + } \ + memset(dst_argb_c, 123, kStrideB* kHeightB); \ + memset(dst_argb_opt, 123, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \ + kStrideB, kWidth, kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \ + kStrideB, kWidth, kHeight); \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } \ } #define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ @@ -1464,10 +1467,11 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) const int kStrideA = \ (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ align_buffer_page_end(src_argb, \ - kStrideA* kHeightA * sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideA* kHeightA * sizeof(TYPE_A)); \ - align_buffer_page_end(dst_argb_opt, kStrideA* kHeightA * sizeof(TYPE_A)); \ - for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ src_argb[i + OFF] = (fastrand() & 0xff); \ } \ memset(dst_argb_c, 1, kStrideA* kHeightA); \ @@ -1486,7 +1490,7 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) MaskCpuFlags(benchmark_cpu_info_); \ FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \ kWidth, NEG kHeight); \ - for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ } \ diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index f98fbaa28..48ad75eaf 100644 --- a/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc @@ -252,13 +252,13 @@ static int ARGBClipTestFilter(int src_width, // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but // filtering is different fixed point implementations for SSSE3, Neon and C. #ifdef ENABLE_SLOW_TESTS -#define TEST_FACTOR(name, nom, denom) \ +#define TEST_FACTOR(name, nom, denom) \ TEST_FACTOR1(, name, None, nom, denom, 0) \ TEST_FACTOR1(, name, Linear, nom, denom, 3) \ TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \ TEST_FACTOR1(, name, Box, nom, denom, 3) #else -#define TEST_FACTOR(name, nom, denom) \ +#define TEST_FACTOR(name, nom, denom) \ TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \ TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \ TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \ @@ -290,13 +290,15 @@ TEST_FACTOR(3, 1, 3) benchmark_cpu_info_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, DISABLED_##name##ClipTo##width##x##height##_##filter) { \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##name##ClipTo##width##x##height##_##filter) { \ int diff = \ ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \ kFilter##filter, benchmark_iterations_); \ EXPECT_LE(diff, max_diff); \ } \ - TEST_F(LibYUVScaleTest, DISABLED_##name##ClipFrom##width##x##height##_##filter) { \ + TEST_F(LibYUVScaleTest, \ + DISABLED_##name##ClipFrom##width##x##height##_##filter) { \ int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \ Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_); \ @@ -305,12 +307,12 @@ TEST_FACTOR(3, 1, 3) /// Test scale to a specified size with all 4 filters. #ifdef ENABLE_SLOW_TESTS -#define TEST_SCALETO(name, width, height) \ +#define TEST_SCALETO(name, width, height) \ TEST_SCALETO1(, name, width, height, None, 0) \ TEST_SCALETO1(, name, width, height, Linear, 3) \ TEST_SCALETO1(, name, width, height, Bilinear, 3) #else -#define TEST_SCALETO(name, width, height) \ +#define TEST_SCALETO(name, width, height) \ TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \ TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \ TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3)