diff --git a/README.chromium b/README.chromium index fcab38358..4e8ed86f1 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1054 +Version: 1055 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index f0ed6c942..d5663fe8e 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -301,8 +301,8 @@ extern "C" { // #define HAS_ARGBTORGB565ROW_NEON // #define HAS_ARGBTOARGB1555ROW_NEON // #define HAS_ARGBTOARGB4444ROW_NEON -// #define HAS_ARGBTOYROW_NEON -// #define HAS_ARGBTOYJROW_NEON +#define HAS_ARGBTOYROW_NEON +#define HAS_ARGBTOYJROW_NEON // #define HAS_ARGBTOUV444ROW_NEON // #define HAS_ARGBTOUV422ROW_NEON // #define HAS_ARGBTOUV411ROW_NEON diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 86c685b5e..720779262 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1054 +#define LIBYUV_VERSION 1055 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index 1bf4a41ba..996a96f50 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -711,11 +711,13 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } + } +#endif +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; } } #endif diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 121a41611..6fedf477b 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -60,6 +60,13 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, } } } +#elif defined(HAS_ARGBTOUV444ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + ARGBToUV444Row = ARGBToUV444Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToUV444Row = ARGBToUV444Row_NEON; + } + } #endif #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { @@ -76,10 +83,8 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb, #elif defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON) && width >= 8) { ARGBToYRow = ARGBToYRow_Any_NEON; - ARGBToUV444Row = ARGBToUV444Row_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; - ARGBToUV444Row = ARGBToUV444Row_NEON; } } #endif @@ -134,6 +139,13 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, } } } +#elif defined(HAS_ARGBTOUV422ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + ARGBToUV422Row = ARGBToUV422Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUV422Row = ARGBToUV422Row_NEON; + } + } #endif #if defined(HAS_ARGBTOYROW_SSSE3) @@ -153,12 +165,6 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } - if (width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } } #endif @@ -228,11 +234,13 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb, if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } - if (width >= 32) { - ARGBToUV411Row = ARGBToUV411Row_Any_NEON; - if (IS_ALIGNED(width, 32)) { - ARGBToUV411Row = ARGBToUV411Row_NEON; - } + } +#endif +#if defined(HAS_ARGBTOUV411ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 32) { + ARGBToUV411Row = ARGBToUV411Row_Any_NEON; + if (IS_ALIGNED(width, 32)) { + ARGBToUV411Row = ARGBToUV411Row_NEON; } } #endif @@ -296,11 +304,13 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } + } +#endif +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; } } #endif @@ -399,11 +409,13 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } + } +#endif +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; } } #endif @@ -493,6 +505,13 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, } } } +#elif defined(HAS_ARGBTOUV422ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + ARGBToUV422Row = ARGBToUV422Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUV422Row = ARGBToUV422Row_NEON; + } + } #endif #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { @@ -510,12 +529,6 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } - if (width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } } #endif @@ -594,6 +607,13 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, } } } +#elif defined(HAS_ARGBTOUV422ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + ARGBToUV422Row = ARGBToUV422Row_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUV422Row = ARGBToUV422Row_NEON; + } + } #endif #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { @@ -611,12 +631,6 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } - if (width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } } #endif @@ -1022,11 +1036,13 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb, if (IS_ALIGNED(width, 8)) { ARGBToYJRow = ARGBToYJRow_NEON; } - if (width >= 16) { - ARGBToUVJRow = ARGBToUVJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_NEON; - } + } +#endif +#if defined(HAS_ARGBTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + ARGBToUVJRow = ARGBToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_NEON; } } #endif diff --git a/source/format_conversion.cc b/source/format_conversion.cc index a3daf96a9..3c1737153 100644 --- a/source/format_conversion.cc +++ b/source/format_conversion.cc @@ -332,11 +332,13 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer, if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } + } +#endif +#if defined(HAS_ARGBTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 16) { + ARGBToUVRow = ARGBToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_NEON; } } #endif diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 10aca4c7c..6e964433e 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -1580,28 +1580,28 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, #ifdef HAS_ARGBTOYROW_NEON void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant + "movi v4.8b, #13 \n" // B * 0.1016 coefficient + "movi v5.8b, #65 \n" // G * 0.5078 coefficient + "movi v6.8b, #33 \n" // R * 0.2578 coefficient + "movi v7.8b, #16 \n" // Add 16 constant ".p2align 2 \n" "1: \n" MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. + "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" + "umull v3.8h, v0.8b, v4.8b \n" // B + "umlal v3.8h, v1.8b, v5.8b \n" // G + "umlal v3.8h, v2.8b, v6.8b \n" // R + "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y + "uqadd v0.8b, v0.8b, v7.8b \n" MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(pix) // %2 : - : "cc", "memory", "q0", "q1", "q2", "q12", "q13" + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } #endif // HAS_ARGBTOYROW_NEON @@ -1609,26 +1609,26 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { #ifdef HAS_ARGBTOYJROW_NEON void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { asm volatile ( - "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient - "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient - "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient + "movi v4.8b, #15 \n" // B * 0.11400 coefficient + "movi v5.8b, #75 \n" // G * 0.58700 coefficient + "movi v6.8b, #38 \n" // R * 0.29900 coefficient ".p2align 2 \n" "1: \n" MEMACCESS(0) - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. + "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y + "umull v3.8h, v0.8b, v4.8b \n" // B + "umlal v3.8h, v1.8b, v5.8b \n" // G + "umlal v3.8h, v2.8b, v6.8b \n" // R + "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y MEMACCESS(1) - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(pix) // %2 : - : "cc", "memory", "q0", "q1", "q2", "q12", "q13" + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" ); } #endif // HAS_ARGBTOYJROW_NEON