diff --git a/README.chromium b/README.chromium index 4715c7180..9c78a007f 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1743 +Version: 1744 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 620f4dbc1..d66dfcd96 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -123,6 +123,8 @@ extern "C" { #define HAS_RAWTOYROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 #define HAS_RGB24TOYROW_SSSE3 +#define HAS_RGB24TOYJROW_SSSE3 +#define HAS_RAWTOYJROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 #define HAS_RGBATOUVROW_SSSE3 #define HAS_RGBATOYROW_SSSE3 @@ -194,6 +196,8 @@ extern "C" { #define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 +#define HAS_RGB24TOYJROW_AVX2 +#define HAS_RAWTOYJROW_AVX2 #define HAS_COPYROW_AVX #define HAS_H422TOARGBROW_AVX2 #define HAS_HALFFLOATROW_AVX2 @@ -973,7 +977,11 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width); void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width); void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_y, int width); void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); @@ -1187,8 +1195,12 @@ void RGBAToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); -void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RGB24ToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 24bf8ebb8..2b52c7241 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1743 +#define LIBYUV_VERSION 1744 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert.cc b/source/convert.cc index cf6ebcdc5..d1dc9a42e 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -2380,27 +2380,38 @@ int RGB24ToJ400(const uint8_t* src_rgb24, int width, int height) { int y; -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) = RGB24ToYJRow_C; -#else - void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RGB24ToARGBRow_C; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = - ARGBToYJRow_C; -#endif if (!src_rgb24 || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24; } - -// Neon version does direct RGB24 to YJ. + // Coalesce rows. + if (src_stride_rgb24 == width * 3 && dst_stride_yj == width) { + width *= height; + height = 1; + src_stride_rgb24 = dst_stride_yj = 0; + } +#if defined(HAS_RGB24TOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToYJRow = RGB24ToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToYJRow = RGB24ToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGB24ToYJRow = RGB24ToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGB24ToYJRow = RGB24ToYJRow_AVX2; + } + } +#endif #if defined(HAS_RGB24TOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB24ToYJRow = RGB24ToYJRow_Any_NEON; @@ -2408,83 +2419,28 @@ int RGB24ToJ400(const uint8_t* src_rgb24, RGB24ToYJRow = RGB24ToYJRow_NEON; } } -#elif defined(HAS_RGB24TOYJROW_MSA) +#endif +#if defined(HAS_RGB24TOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB24ToYJRow = RGB24ToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RGB24ToYJRow = RGB24ToYJRow_MSA; } } -#elif defined(HAS_RGB24TOYJROW_MMI) +#endif +#if defined(HAS_RGB24TOYJROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { RGB24ToYJRow = RGB24ToYJRow_Any_MMI; if (IS_ALIGNED(width, 8)) { RGB24ToYJRow = RGB24ToYJRow_MMI; } } -// Other platforms do intermediate conversion from RGB24 to ARGB. -#else -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif #endif - { -#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); -#endif - - for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) - RGB24ToYJRow(src_rgb24, dst_yj, width); - RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_yj + dst_stride_yj, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); - ARGBToYJRow(row, dst_yj, width); - ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width); -#endif - src_rgb24 += src_stride_rgb24 * 2; - dst_yj += dst_stride_yj * 2; - } - if (height & 1) { -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) - RGB24ToYJRow(src_rgb24, dst_yj, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - ARGBToYJRow(row, dst_yj, width); -#endif - } -#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) - free_aligned_buffer_64(row); -#endif + for (y = 0; y < height; ++y) { + RGB24ToYJRow(src_rgb24, dst_yj, width); + src_rgb24 += src_stride_rgb24; + dst_yj += dst_stride_yj; } return 0; } @@ -2498,27 +2454,38 @@ int RAWToJ400(const uint8_t* src_raw, int width, int height) { int y; -#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ - defined(HAS_RAWTOYJROW_MMI)) void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_yj, int width) = RAWToYJRow_C; -#else - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RAWToARGBRow_C; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = - ARGBToYJRow_C; -#endif if (!src_raw || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } - -// Neon version does direct RAW to YJ. + // Coalesce rows. + if (src_stride_raw == width * 3 && dst_stride_yj == width) { + width *= height; + height = 1; + src_stride_raw = dst_stride_yj = 0; + } +#if defined(HAS_RAWTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RAWToYJRow = RAWToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_RAWTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RAWToYJRow = RAWToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RAWToYJRow = RAWToYJRow_AVX2; + } + } +#endif #if defined(HAS_RAWTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RAWToYJRow = RAWToYJRow_Any_NEON; @@ -2526,83 +2493,28 @@ int RAWToJ400(const uint8_t* src_raw, RAWToYJRow = RAWToYJRow_NEON; } } -#elif defined(HAS_RAWTOYJROW_MSA) +#endif +#if defined(HAS_RAWTOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RAWToYJRow = RAWToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RAWToYJRow = RAWToYJRow_MSA; } } -#elif defined(HAS_RAWTOYJROW_MMI) +#endif +#if defined(HAS_RAWTOYJROW_MMI) if (TestCpuFlag(kCpuHasMMI)) { RAWToYJRow = RAWToYJRow_Any_MMI; if (IS_ALIGNED(width, 8)) { RAWToYJRow = RAWToYJRow_MMI; } } -// Other platforms do intermediate conversion from RAW to ARGB. -#else -#if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RAWToARGBRow = RAWToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif #endif - { -#if !(defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ - defined(HAS_RAWTOYJROW_MMI)) - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); -#endif - - for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ - defined(HAS_RAWTOYJROW_MMI)) - RAWToYJRow(src_raw, dst_yj, width); - RAWToYJRow(src_raw + src_stride_raw, dst_yj + dst_stride_yj, width); -#else - RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToYJRow(row, dst_yj, width); - ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width); -#endif - src_raw += src_stride_raw * 2; - dst_yj += dst_stride_yj * 2; - } - if (height & 1) { -#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ - defined(HAS_RAWTOYJROW_MMI)) - RAWToYJRow(src_raw, dst_yj, width); -#else - RAWToARGBRow(src_raw, row, width); - ARGBToYJRow(row, dst_yj, width); -#endif - } -#if !(defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ - defined(HAS_RAWTOYJROW_MMI)) - free_aligned_buffer_64(row); -#endif + for (y = 0; y < height; ++y) { + RAWToYJRow(src_raw, dst_yj, width); + src_raw += src_stride_raw; + dst_yj += dst_stride_yj; } return 0; } diff --git a/source/row_any.cc b/source/row_any.cc index 8f4b06a30..0cf1a6f5e 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -695,6 +695,12 @@ ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7) #ifdef HAS_RGB24TOYROW_NEON ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) #endif +#ifdef HAS_RGB24TOYJROW_AVX2 +ANY11(RGB24ToYJRow_Any_AVX2, RGB24ToYJRow_AVX2, 0, 3, 1, 31) +#endif +#ifdef HAS_RGB24TOYJROW_SSSE3 +ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15) +#endif #ifdef HAS_RGB24TOYJROW_NEON ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 7) #endif @@ -707,6 +713,12 @@ ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7) #ifdef HAS_RAWTOYROW_NEON ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) #endif +#ifdef HAS_RAWTOYJROW_AVX2 +ANY11(RAWToYJRow_Any_AVX2, RAWToYJRow_AVX2, 0, 3, 1, 31) +#endif +#ifdef HAS_RAWTOYJROW_SSSE3 +ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15) +#endif #ifdef HAS_RAWTOYJROW_NEON ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 7) #endif diff --git a/source/row_common.cc b/source/row_common.cc index e6a5e80d7..97451dffa 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -3307,6 +3307,70 @@ void NV12ToRGB565Row_AVX2(const uint8_t* src_y, } #endif +#ifdef HAS_RGB24TOYJROW_AVX2 +// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values. +void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth); + ARGBToYJRow_AVX2(row, dst_yj, twidth); + src_rgb24 += twidth * 3; + dst_yj += twidth; + width -= twidth; + } +} +#endif //HAS_RGB24TOYJROW_AVX2 + +#ifdef HAS_RAWTOYJROW_AVX2 +// Convert 16 RAW pixels (64 bytes) to 16 YJ values. +void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + RAWToARGBRow_SSSE3(src_raw, row, twidth); + ARGBToYJRow_AVX2(row, dst_yj, twidth); + src_raw += twidth * 3; + dst_yj += twidth; + width -= twidth; + } +} +#endif //HAS_RAWTOYJROW_AVX2 + +#ifdef HAS_RGB24TOYJROW_SSSE3 +// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values. +void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth); + ARGBToYJRow_SSSE3(row, dst_yj, twidth); + src_rgb24 += twidth * 3; + dst_yj += twidth; + width -= twidth; + } +} +#endif //HAS_RGB24TOYJROW_SSSE3 + +#ifdef HAS_RAWTOYJROW_SSSE3 +// Convert 16 RAW pixels (64 bytes) to 16 YJ values. +void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) { + // Row buffer for intermediate ARGB pixels. + SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); + while (width > 0) { + int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; + RAWToARGBRow_SSSE3(src_raw, row, twidth); + ARGBToYJRow_SSSE3(row, dst_yj, twidth); + src_raw += twidth * 3; + dst_yj += twidth; + width -= twidth; + } +} +#endif //HAS_RAWTOYJROW_SSSE3 + float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) { float fsum = 0.f; int i;