From 67a9e30225de48bdceb1d3765091a2f3a6b2956b Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 11 Sep 2015 11:12:30 -0700 Subject: [PATCH] neon yuv matrix function R=harryjin@google.com BUG=libyuv:488 Review URL: https://codereview.chromium.org/1337973002 . --- README.chromium | 2 +- include/libyuv/row.h | 13 +++++++++++-- include/libyuv/version.h | 2 +- source/row_common.cc | 7 +++++++ source/row_neon.cc | 26 +++++++++++++++++--------- 5 files changed, 37 insertions(+), 13 deletions(-) diff --git a/README.chromium b/README.chromium index 54c57677c..d2df87b3d 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1481 +Version: 1482 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 2b864a50b..0a64716de 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -304,7 +304,9 @@ extern "C" { #define HAS_I422TOARGB4444ROW_NEON #define HAS_I422TOARGBROW_NEON // TODO(fbarchard): Implement NEON version -// #define HAS_I422TOARGBMATRIXROW_NEON +#ifndef __aarch64__ +#define HAS_I422TOARGBMATRIXROW_NEON +#endif // #define HAS_I422TOABGRMATRIXROW_NEON #define HAS_I422TOBGRAROW_NEON #define HAS_I422TORAWROW_NEON @@ -442,6 +444,13 @@ struct YuvConstants { lvec16 kYToRgb; }; +struct YuvConstantsNEON { + uvec8 kUVToRB; + uvec8 kUVToG; + vec16 kUVBiasBGR; + vec32 kYToRgb; +}; + #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else @@ -541,7 +550,7 @@ void I422ToARGBMatrixRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, uint8* dst_argb, - struct YuvConstants* YuvConstants, + struct YuvConstantsNEON* YuvConstants, int width); void I422ToABGRMatrixRow_NEON(const uint8* src_y, const uint8* src_u, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 240c2ef28..ecf6983c6 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1481 +#define LIBYUV_VERSION 1482 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_common.cc b/source/row_common.cc index c4c8116c6..eab7e9b79 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -2276,6 +2276,7 @@ void I422ToUYVYRow_C(const uint8* src_y, extern struct YuvConstants kYuvConstants; extern struct YuvConstants kYuvJConstants; extern struct YuvConstants kYuvHConstants; +extern struct YuvConstantsNEON kYuvConstantsNEON; #define ANYYUV(NAMEANY, ANY_SIMD, YUVCONSTANTS) \ void NAMEANY(const uint8* y_buf, \ @@ -2286,6 +2287,12 @@ extern struct YuvConstants kYuvHConstants; ANY_SIMD(y_buf, u_buf, v_buf, dst_argb, &YUVCONSTANTS, width); \ } +#ifdef HAS_I422TOARGBMATRIXROW_NEON +ANYYUV(I422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvConstantsNEON) +//ANYYUV(J422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvJConstantsNEON) +//ANYYUV(H422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvHConstantsNEON) +#endif + #ifdef HAS_I422TOARGBMATRIXROW_SSSE3 ANYYUV(I422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvConstants) ANYYUV(J422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvJConstants) diff --git a/source/row_neon.cc b/source/row_neon.cc index 4298a3622..2eb7d9dea 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -150,6 +150,13 @@ extern "C" { #define BG (UG * 128 + VG * 128 - YGB) #define BR (VR * 128 - YGB) +YuvConstantsNEON SIMD_ALIGNED(kYuvConstantsNEON) = { + { 128, 128, 128, 128, 102, 102, 102, 102, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 25, 25, 25, 25, 52, 52, 52, 52, 0, 0, 0, 0, 0, 0, 0, 0 }, + { BB, BG, BR, 0, 0, 0, 0, 0 }, + { 0x0101 * YG, 0, 0, 0 } +}; + static uvec8 kUVToRB = { 128, 128, 128, 128, 102, 102, 102, 102, 0, 0, 0, 0, 0, 0, 0, 0 }; static uvec8 kUVToG = { 25, 25, 25, 25, 52, 52, 52, 52, @@ -196,11 +203,12 @@ void I444ToARGBRow_NEON(const uint8* src_y, ); } -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width) { +void I422ToARGBMatrixRow_NEON(const uint8* src_y, + const uint8* src_u, + const uint8* src_v, + uint8* dst_argb, + struct YuvConstantsNEON* YuvConstants, + int width) { asm volatile ( YUV422TORGB_SETUP_REG "1: \n" @@ -216,10 +224,10 @@ void I422ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVToRB]"r"(&kUVToRB), // %5 - [kUVToG]"r"(&kUVToG), // %6 - [kUVBiasBGR]"r"(&kUVBiasBGR), - [kYToRgb]"r"(&kYToRgb) + : [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5 + [kUVToG]"r"(&YuvConstants->kUVToG), // %6 + [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR), + [kYToRgb]"r"(&YuvConstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" );