From 013080f2d2153e6c5253573fa86a63e4b5843f71 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 6 Oct 2015 22:19:14 -0700 Subject: [PATCH] Pass yuvconstants to YUV conversions for neon 64 bit SETUP provided by zhongwei.yao@linaro.org Previously the 64 bit Neon code had hard coded constants in the setup macro for YUV conversion, while 32 bit Neon code supported the yuvconstants parameter. This change accepts the constants passed to the YUV conversion row function, allowing different color spaces to be respected - naming JPEG and BT.709. As well as the existing BT.601. TBR=harryjin@google.com BUG=libyuv:472 Review URL: https://codereview.chromium.org/1384323002 . --- README.chromium | 2 +- include/libyuv/row.h | 9 ++-- include/libyuv/version.h | 2 +- source/row_common.cc | 38 +++++++------- source/row_neon64.cc | 105 +++++++++++++++++++++++++-------------- 5 files changed, 95 insertions(+), 61 deletions(-) diff --git a/README.chromium b/README.chromium index 7d9cb9fc2..837bfe389 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1501 +Version: 1502 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 4fa44e33b..8695d894d 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -428,10 +428,10 @@ typedef uint8 ulvec8[32]; // This struct is for Arm color conversion. struct YuvConstants { - uvec8 kUVToRB; - uvec8 kUVToG; - vec16 kUVBiasBGR; - vec32 kYToRgb; + uvec8 kUVToRB; + uvec8 kUVToG; + vec16 kUVBiasBGR; + vec32 kYToRgb; }; #else @@ -458,7 +458,6 @@ struct YuvConstants { #endif extern struct YuvConstants kYuvConstants; -extern struct YuvConstants kYvuConstants; extern struct YuvConstants kYuvJConstants; extern struct YuvConstants kYuvHConstants; diff --git a/include/libyuv/version.h b/include/libyuv/version.h index a2a70c4f7..81394a574 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1501 +#define LIBYUV_VERSION 1502 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_common.cc b/source/row_common.cc index 0b236e72e..24cd5c3a9 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1014,21 +1014,21 @@ void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) -#if defined(__arm__) || defined(__aarch64__) +#if defined(__aarch64__) +YuvConstants SIMD_ALIGNED(kYuvConstants) = { + { -UB, 0, -UB, 0, -UB, 0, -UB, 0, -VR, 0, -VR, 0, -VR, 0, -VR, 0 }, + { UG, 0, UG, 0, UG, 0, UG, 0, VG, 0, VG, 0, VG, 0, VG, 0 }, + { BB, BG, BR, 0, 0, 0, 0, 0 }, + { 0x0101 * YG, 0, 0, 0 } + }; + +#elif defined(__arm__) YuvConstants SIMD_ALIGNED(kYuvConstants) = { { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, { BB, BG, BR, 0, 0, 0, 0, 0 }, { 0x0101 * YG, 0, 0, 0 } }; - -YuvConstants SIMD_ALIGNED(kYvuConstants) = { - { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 }, - { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 }, - { BB, BG, BR, 0, 0, 0, 0, 0 }, - { 0x0101 * YG, 0, 0, 0 } -}; - #else // BT601 constants for YUV to RGB. YuvConstants SIMD_ALIGNED(kYuvConstants) = { @@ -1081,13 +1081,19 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) { static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* b, uint8* g, uint8* r, struct YuvConstants* yuvconstants) { -#if defined(__arm__) || defined(__aarch64__) - +#if defined(__aarch64__) + int UB = -yuvconstants->kUVToRB[0]; + int UG = yuvconstants->kUVToG[0]; + int VG = yuvconstants->kUVToG[8]; + int VR = -yuvconstants->kUVToRB[8]; + int BB = yuvconstants->kUVBiasBGR[0]; + int BG = yuvconstants->kUVBiasBGR[1]; + int BR = yuvconstants->kUVBiasBGR[2]; + int YG = yuvconstants->kYToRgb[0]; +#elif defined(__arm__) int UB = -yuvconstants->kUVToRB[0]; - int VB = 0; int UG = yuvconstants->kUVToG[0]; int VG = yuvconstants->kUVToG[4]; - int UR = 0; int VR = -yuvconstants->kUVToRB[4]; int BB = yuvconstants->kUVBiasBGR[0]; int BG = yuvconstants->kUVBiasBGR[1]; @@ -1095,10 +1101,8 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, int YG = yuvconstants->kYToRgb[0]; #else int UB = yuvconstants->kUVToB[0]; - int VB = yuvconstants->kUVToB[1]; // usually 0 int UG = yuvconstants->kUVToG[0]; int VG = yuvconstants->kUVToG[1]; - int UR = yuvconstants->kUVToR[0]; // usually 0 int VR = yuvconstants->kUVToR[1]; int BB = yuvconstants->kUVBiasB[0]; int BG = yuvconstants->kUVBiasG[0]; @@ -1106,9 +1110,9 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, int YG = yuvconstants->kYToRgb[0]; #endif uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16; - *b = Clamp((int32)(-(u * UB + v * VB) + y1 + BB) >> 6); + *b = Clamp((int32)(-(u * UB ) + y1 + BB) >> 6); *g = Clamp((int32)(-(u * UG + v * VG) + y1 + BG) >> 6); - *r = Clamp((int32)(-(u * UR + v * VR) + y1 + BR) >> 6); + *r = Clamp((int32)(-( v * VR) + y1 + BR) >> 6); } // JPEG YUV to RGB reference diff --git a/source/row_neon64.cc b/source/row_neon64.cc index e89d5acad..b13000b4b 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -91,16 +91,13 @@ extern "C" { "uzp2 v3.8b, v2.8b, v2.8b \n" \ "ins v1.s[1], v3.s[0] \n" -// TODO(fbarchard): replace movi with constants from struct. #define YUVTORGB_SETUP \ "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \ "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \ "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \ "ld1r {v31.4s}, [%[kYToRgb]] \n" \ - "movi v27.8h, #128 \n" \ - "movi v28.8h, #102 \n" \ - "movi v29.8h, #25 \n" \ - "movi v30.8h, #52 \n" + "ld1 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \ + "ld1 {v29.8h, v30.8h}, [%[kUVToG]] \n" #define YUVTORGB(vR, vG, vB) \ "uxtl v0.8h, v0.8b \n" /* Extract Y */ \ @@ -161,7 +158,9 @@ void I444ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" @@ -192,8 +191,10 @@ void I422ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -222,8 +223,10 @@ void I411ToARGBRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -252,8 +255,10 @@ void I422ToBGRARow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_bgra), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -283,8 +288,10 @@ void I422ToABGRRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_abgr), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -313,8 +320,10 @@ void I422ToRGBARow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgba), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -342,8 +351,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgb24), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -371,8 +382,10 @@ void I422ToRAWRow_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_raw), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -408,8 +421,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_rgb565), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -448,8 +463,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb1555), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -490,8 +507,10 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, "+r"(src_v), // %2 "+r"(dst_argb4444), // %3 "+r"(width) // %4 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -516,7 +535,9 @@ void I400ToARGBRow_NEON(const uint8* src_y, : "+r"(src_y), // %0 "+r"(dst_argb), // %1 "+r"(width64) // %2 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), + : [kUVToRB]"r"(&kYuvConstants.kUVToRB), + [kUVToG]"r"(&kYuvConstants.kUVToG), + [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), [kYToRgb]"r"(&kYuvConstants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" @@ -568,8 +589,10 @@ void NV12ToARGBRow_NEON(const uint8* src_y, "+r"(src_uv), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -596,8 +619,10 @@ void NV21ToARGBRow_NEON(const uint8* src_y, "+r"(src_vu), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -624,8 +649,10 @@ void NV12ToRGB565Row_NEON(const uint8* src_y, "+r"(src_uv), // %1 "+r"(dst_rgb565), // %2 "+r"(width) // %3 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -651,8 +678,10 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, : "+r"(src_yuy2), // %0 "+r"(dst_argb), // %1 "+r"(width64) // %2 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); @@ -678,8 +707,10 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, : "+r"(src_uyvy), // %0 "+r"(dst_argb), // %1 "+r"(width64) // %2 - : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), - [kYToRgb]"r"(&kYuvConstants.kYToRgb) + : [kUVToRB]"r"(&yuvconstants->kUVToRB), + [kUVToG]"r"(&yuvconstants->kUVToG), + [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), + [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" );