From 081475b3c86a049c318cb8182e0b12712ff2b40a Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 12 Jan 2016 17:05:49 -0800 Subject: [PATCH] refactor ARGBToI422 using ARGBToI420 internally R=harryjin@google.com BUG=libyuv:546 Review URL: https://codereview.chromium.org/1574253004 . --- README.chromium | 2 +- include/libyuv/row.h | 15 ---- include/libyuv/version.h | 2 +- source/convert_from_argb.cc | 175 +++++++++++++++++------------------- source/row_any.cc | 4 - source/row_common.cc | 44 --------- source/row_gcc.cc | 53 ----------- source/row_neon.cc | 49 ---------- source/row_neon64.cc | 44 --------- source/row_win.cc | 58 ------------ unit_test/convert_test.cc | 2 + 11 files changed, 84 insertions(+), 364 deletions(-) diff --git a/README.chromium b/README.chromium index ddf849201..b09a90679 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1564 +Version: 1565 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 994df06e1..6a13393a6 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -93,7 +93,6 @@ extern "C" { #define HAS_ARGBTORGB24ROW_SSSE3 #define HAS_ARGBTORGB565DITHERROW_SSE2 #define HAS_ARGBTORGB565ROW_SSE2 -#define HAS_ARGBTOUV422ROW_SSSE3 #define HAS_ARGBTOUV444ROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3 @@ -280,7 +279,6 @@ extern "C" { #define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565ROW_NEON #define HAS_ARGBTOUV411ROW_NEON -#define HAS_ARGBTOUV422ROW_NEON #define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON @@ -648,8 +646,6 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width); void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, @@ -736,8 +732,6 @@ void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, uint8* dst_u, uint8* dst_v, int width); void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, @@ -788,19 +782,10 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - void ARGBToUV444Row_C(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); void ARGBToUV411Row_C(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJ422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 218b3832b..773a5aebd 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1564 +#define LIBYUV_VERSION 1565 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 6796343c0..2a8682b7e 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -109,13 +109,16 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, uint8* dst_v, int dst_stride_v, int width, int height) { int y; - void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) = ARGBToUV422Row_C; + void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = ARGBToYRow_C; - if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + if (!src_argb || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -130,34 +133,22 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, height = 1; src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; } -#if defined(HAS_ARGBTOUV422ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUV422ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } @@ -170,9 +161,17 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } +#endif for (y = 0; y < height; ++y) { - ARGBToUV422Row(src_argb, dst_u, dst_v, width); + ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); src_argb += src_stride_argb; dst_y += dst_stride_y; @@ -478,8 +477,8 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, uint8* dst_yuy2, int dst_stride_yuy2, int width, int height) { int y; - void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) = ARGBToUV422Row_C; + void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = ARGBToYRow_C; void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, @@ -502,34 +501,22 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, height = 1; src_stride_argb = dst_stride_yuy2 = 0; } -#if defined(HAS_ARGBTOUV422ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUV422ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } @@ -542,7 +529,14 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, } } #endif - +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; @@ -567,7 +561,7 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, uint8* row_v = row_u + ((width + 63) & ~63) / 2; for (y = 0; y < height; ++y) { - ARGBToUV422Row(src_argb, row_u, row_v, width); + ARGBToUVRow(src_argb, 0, row_u, row_v, width); ARGBToYRow(src_argb, row_y, width); I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width); src_argb += src_stride_argb; @@ -585,8 +579,8 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, uint8* dst_uyvy, int dst_stride_uyvy, int width, int height) { int y; - void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) = ARGBToUV422Row_C; + void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) = ARGBToYRow_C; void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, @@ -609,34 +603,22 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, height = 1; src_stride_argb = dst_stride_uyvy = 0; } -#if defined(HAS_ARGBTOUV422ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUV422ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } @@ -649,7 +631,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, } } #endif - +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; + } + } +#endif #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; @@ -674,7 +663,7 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, uint8* row_v = row_u + ((width + 63) & ~63) / 2; for (y = 0; y < height; ++y) { - ARGBToUV422Row(src_argb, row_u, row_v, width); + ARGBToUVRow(src_argb, 0, row_u, row_v, width); ARGBToYRow(src_argb, row_y, width); I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width); src_argb += src_stride_argb; @@ -1157,21 +1146,24 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, return 0; } -// ARGB little endian (bgra in memory) to J422 +// Convert ARGB to J422. (JPeg full range I422). LIBYUV_API int ARGBToJ422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, + uint8* dst_yj, int dst_stride_yj, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { int y; - void (*ARGBToUVJ422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) = ARGBToUVJ422Row_C; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_y, int width) = + void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + if (!src_argb || + !dst_yj || !dst_u || !dst_v || + width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -1179,34 +1171,19 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb, } // Coalesce rows. if (src_stride_argb == width * 4 && - dst_stride_y == width && + dst_stride_yj == width && dst_stride_u * 2 == width && dst_stride_v * 2 == width) { width *= height; height = 1; - src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; + src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0; } -#if defined(HAS_ARGBTOUVJ422ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJ422Row = ARGBToUVJ422Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJ422Row = ARGBToUVJ422Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVJ422ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVJ422Row = ARGBToUVJ422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJ422Row = ARGBToUVJ422Row_NEON; - } - } -#endif - -#if defined(HAS_ARGBTOYJROW_SSSE3) +#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3; } } @@ -1227,12 +1204,20 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb, } } #endif +#if defined(HAS_ARGBTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVJRow = ARGBToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_NEON; + } + } +#endif for (y = 0; y < height; ++y) { - ARGBToUVJ422Row(src_argb, dst_u, dst_v, width); - ARGBToYJRow(src_argb, dst_y, width); + ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); + ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; - dst_y += dst_stride_y; + dst_yj += dst_stride_yj; dst_u += dst_stride_u; dst_v += dst_stride_v; } diff --git a/source/row_any.cc b/source/row_any.cc index 5e5f435a6..d93c33d29 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -715,16 +715,12 @@ ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) #endif -#ifdef HAS_ARGBTOUV422ROW_SSSE3 -ANY12(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3, 0, 4, 1, 15) -#endif #ifdef HAS_YUY2TOUV422ROW_SSE2 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) #endif #ifdef HAS_YUY2TOUV422ROW_NEON ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) -ANY12(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, 0, 4, 1, 15) ANY12(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, 0, 4, 2, 31) ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) diff --git a/source/row_common.cc b/source/row_common.cc index c820cdf1f..011501f90 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -433,28 +433,6 @@ void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ MAKEROWYJ(ARGB, 2, 1, 0, 4) #undef MAKEROWYJ -void ARGBToUVJ422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 ab = (src_argb[0] + src_argb[4]) >> 1; - uint8 ag = (src_argb[1] + src_argb[5]) >> 1; - uint8 ar = (src_argb[2] + src_argb[6]) >> 1; - dst_u[0] = RGBToUJ(ar, ag, ab); - dst_v[0] = RGBToVJ(ar, ag, ab); - src_argb += 8; - dst_u += 1; - dst_v += 1; - } - if (width & 1) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; - dst_u[0] = RGBToUJ(ar, ag, ab); - dst_v[0] = RGBToVJ(ar, ag, ab); - } -} - void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { int x; for (x = 0; x < width; ++x) { @@ -658,28 +636,6 @@ void ARGBToUV444Row_C(const uint8* src_argb, } } -void ARGBToUV422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 ab = (src_argb[0] + src_argb[4]) >> 1; - uint8 ag = (src_argb[1] + src_argb[5]) >> 1; - uint8 ar = (src_argb[2] + src_argb[6]) >> 1; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - src_argb += 8; - dst_u += 1; - dst_v += 1; - } - if (width & 1) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } -} - void ARGBToUV411Row_C(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width) { int x; diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 8ac5084b6..61fbbbd3a 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1144,59 +1144,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v, } #endif // HAS_ARGBTOUV444ROW_SSSE3 -#ifdef HAS_ARGBTOUV422ROW_SSSE3 -void ARGBToUV422Row_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %4,%%xmm3 \n" - "movdqa %5,%%xmm4 \n" - "movdqa %6,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "m"(kARGBToV), // %4 - "m"(kARGBToU), // %5 - "m"(kAddUV128) // %6 - : "memory", "cc", NACL_R14 - "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" - ); -} -#endif // HAS_ARGBTOUV422ROW_SSSE3 - void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width) { asm volatile ( "movdqa %4,%%xmm5 \n" diff --git a/source/row_neon.cc b/source/row_neon.cc index 5b4ff3b5a..13fe95cb7 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -1374,55 +1374,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, ); } -// 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. -void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - "1: \n" - MEMACCESS(0) - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - MEMACCESS(0) - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - - "subs %3, %3, #16 \n" // 16 processed per loop. - "vmul.s16 q8, q0, q10 \n" // B - "vmls.s16 q8, q1, q11 \n" // G - "vmls.s16 q8, q2, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - - "vmul.s16 q9, q2, q10 \n" // R - "vmls.s16 q9, q1, q14 \n" // G - "vmls.s16 q9, q0, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - - MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. - MEMACCESS(2) - "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width) { diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 6fe5a1080..0b5ca05bf 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1477,50 +1477,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ -// 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. -#ifdef HAS_ARGBTOUV422ROW_NEON -void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - RGBTOUV_SETUP_REG - "1: \n" - MEMACCESS(0) - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. - - "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. - "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. - "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - - "subs %w3, %w3, #16 \n" // 16 processed per loop. - "mul v3.8h, v0.8h, v20.8h \n" // B - "mls v3.8h, v1.8h, v21.8h \n" // G - "mls v3.8h, v2.8h, v22.8h \n" // R - "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned - - "mul v4.8h, v2.8h, v20.8h \n" // R - "mls v4.8h, v1.8h, v24.8h \n" // G - "mls v4.8h, v0.8h, v23.8h \n" // B - "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned - - "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U - "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V - - MEMACCESS(1) - "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. - MEMACCESS(2) - "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. - "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", - "v20", "v21", "v22", "v23", "v24", "v25" - ); -} -#endif // HAS_ARGBTOUV422ROW_NEON - // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. #ifdef HAS_ARGBTOUV411ROW_NEON void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, diff --git a/source/row_win.cc b/source/row_win.cc index f9c477f2a..b843998d8 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -1647,64 +1647,6 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0, } } -__declspec(naked) -void ARGBToUV422Row_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_argb - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // width - movdqa xmm5, xmmword ptr kAddUV128 - movdqa xmm6, xmmword ptr kARGBToV - movdqa xmm7, xmmword ptr kARGBToU - sub edi, edx // stride from u to v - - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} - __declspec(naked) void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index b0b14b924..b4e8d8573 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -766,8 +766,10 @@ TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4) // arm version subsamples by summing 4 pixels then multiplying by matrix with // 4x smaller coefficients which are rounded to nearest integer. TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 4) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 4) #else TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0) +TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 0) #endif TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)