diff --git a/source/row_neon.cc b/source/row_neon.cc index 9e60237ec..909df060c 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -2583,8 +2583,6 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, ); } -// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. -#ifdef HAS_ARGBMULTIPLYROW_NEON // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { @@ -2616,7 +2614,6 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, : "cc", "memory", "q0", "q1", "q2", "q3" ); } -#endif // HAS_ARGBMULTIPLYROW_NEON // Add 2 rows of ARGB pixels together, 8 pixels at a time. void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 80e1515b2..406dd6a9b 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -127,7 +127,6 @@ extern "C" { "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ -#ifdef HAS_I444TOARGBROW_NEON void I444ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -157,9 +156,7 @@ void I444ToARGBRow_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I444TOARGBROW_NEON -#ifdef HAS_I422TOARGBROW_NEON void I422ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -189,9 +186,7 @@ void I422ToARGBRow_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I422TOARGBROW_NEON -#ifdef HAS_I422ALPHATOARGBROW_NEON void I422AlphaToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -224,9 +219,7 @@ void I422AlphaToARGBRow_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I422ALPHATOARGBROW_NEON -#ifdef HAS_I411TOARGBROW_NEON void I411ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -256,9 +249,7 @@ void I411ToARGBRow_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I411TOARGBROW_NEON -#ifdef HAS_I422TORGBAROW_NEON void I422ToRGBARow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -288,9 +279,7 @@ void I422ToRGBARow_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I422TORGBAROW_NEON -#ifdef HAS_I422TORGB24ROW_NEON void I422ToRGB24Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -319,7 +308,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I422TORGB24ROW_NEON #define ARGBTORGB565 \ "shll v0.8h, v22.8b, #8 \n" /* R */ \ @@ -328,7 +316,6 @@ void I422ToRGB24Row_NEON(const uint8* src_y, "sri v0.8h, v21.8h, #5 \n" /* RG */ \ "sri v0.8h, v20.8h, #11 \n" /* RGB */ -#ifdef HAS_I422TORGB565ROW_NEON void I422ToRGB565Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -358,7 +345,6 @@ void I422ToRGB565Row_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I422TORGB565ROW_NEON #define ARGBTOARGB1555 \ "shll v0.8h, v23.8b, #8 \n" /* A */ \ @@ -369,7 +355,6 @@ void I422ToRGB565Row_NEON(const uint8* src_y, "sri v0.8h, v21.8h, #6 \n" /* ARG */ \ "sri v0.8h, v20.8h, #11 \n" /* ARGB */ -#ifdef HAS_I422TOARGB1555ROW_NEON void I422ToARGB1555Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -400,7 +385,6 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I422TOARGB1555ROW_NEON #define ARGBTOARGB4444 \ /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ @@ -412,7 +396,6 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \ "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */ -#ifdef HAS_I422TOARGB4444ROW_NEON void I422ToARGB4444Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -444,9 +427,7 @@ void I422ToARGB4444Row_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I422TOARGB4444ROW_NEON -#ifdef HAS_I400TOARGBROW_NEON void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { @@ -471,9 +452,7 @@ void I400ToARGBRow_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_I400TOARGBROW_NEON -#ifdef HAS_J400TOARGBROW_NEON void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) { @@ -495,9 +474,7 @@ void J400ToARGBRow_NEON(const uint8* src_y, : "cc", "memory", "v20", "v21", "v22", "v23" ); } -#endif // HAS_J400TOARGBROW_NEON -#ifdef HAS_NV12TOARGBROW_NEON void NV12ToARGBRow_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_argb, @@ -525,9 +502,7 @@ void NV12ToARGBRow_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_NV12TOARGBROW_NEON -#ifdef HAS_NV12TOARGBROW_NEON void NV21ToARGBRow_NEON(const uint8* src_y, const uint8* src_vu, uint8* dst_argb, @@ -555,9 +530,7 @@ void NV21ToARGBRow_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_NV12TOARGBROW_NEON -#ifdef HAS_NV12TORGB565ROW_NEON void NV12ToRGB565Row_NEON(const uint8* src_y, const uint8* src_uv, uint8* dst_rgb565, @@ -585,9 +558,7 @@ void NV12ToRGB565Row_NEON(const uint8* src_y, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_NV12TORGB565ROW_NEON -#ifdef HAS_YUY2TOARGBROW_NEON void YUY2ToARGBRow_NEON(const uint8* src_yuy2, uint8* dst_argb, const struct YuvConstants* yuvconstants, @@ -614,9 +585,7 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_YUY2TOARGBROW_NEON -#ifdef HAS_UYVYTOARGBROW_NEON void UYVYToARGBRow_NEON(const uint8* src_uyvy, uint8* dst_argb, const struct YuvConstants* yuvconstants, @@ -643,10 +612,8 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } -#endif // HAS_UYVYTOARGBROW_NEON // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. -#ifdef HAS_SPLITUVROW_NEON void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { asm volatile ( @@ -667,10 +634,8 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, : "cc", "memory", "v0", "v1" // Clobber List ); } -#endif // HAS_SPLITUVROW_NEON // Reads 16 U's and V's and writes out 16 pairs of UV. -#ifdef HAS_MERGEUVROW_NEON void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width) { asm volatile ( @@ -692,10 +657,8 @@ void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, : "cc", "memory", "v0", "v1" // Clobber List ); } -#endif // HAS_MERGEUVROW_NEON // Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. -#ifdef HAS_COPYROW_NEON void CopyRow_NEON(const uint8* src, uint8* dst, int count) { asm volatile ( "1: \n" @@ -712,7 +675,6 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } -#endif // HAS_COPYROW_NEON // SetRow writes 'count' bytes using an 8 bit value repeated. void SetRow_NEON(uint8* dst, uint8 v8, int count) { @@ -745,7 +707,6 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { ); } -#ifdef HAS_MIRRORROW_NEON void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { int64 width64 = (int64) width; asm volatile ( @@ -770,9 +731,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { : "cc", "memory", "v0" ); } -#endif // HAS_MIRRORROW_NEON -#ifdef HAS_MIRRORUVROW_NEON void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { int64 width64 = (int64) width; @@ -800,9 +759,7 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, : "cc", "memory", "v0", "v1" ); } -#endif // HAS_MIRRORUVROW_NEON -#ifdef HAS_ARGBMIRRORROW_NEON void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { int64 width64 = (int64) width; asm volatile ( @@ -827,9 +784,7 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { : "cc", "memory", "v0" ); } -#endif // HAS_ARGBMIRRORROW_NEON -#ifdef HAS_RGB24TOARGBROW_NEON void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { asm volatile ( "movi v4.8b, #255 \n" // Alpha @@ -847,9 +802,7 @@ void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List ); } -#endif // HAS_RGB24TOARGBROW_NEON -#ifdef HAS_RAWTOARGBROW_NEON void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { asm volatile ( "movi v5.8b, #255 \n" // Alpha @@ -869,7 +822,6 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List ); } -#endif // HAS_RAWTOARGBROW_NEON void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { asm volatile ( @@ -903,7 +855,6 @@ void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \ "dup v2.2D, v0.D[1] \n" /* R */ -#ifdef HAS_RGB565TOARGBROW_NEON void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { asm volatile ( "movi v3.8b, #255 \n" // Alpha @@ -922,7 +873,6 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List ); } -#endif // HAS_RGB565TOARGBROW_NEON #define ARGB1555TOARGB \ "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ @@ -961,7 +911,6 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { "orr v2.16b, v1.16b, v3.16b \n" /* R */ \ "dup v1.2D, v0.D[1] \n" /* G */ \ -#ifdef HAS_ARGB1555TOARGBROW_NEON void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, int width) { asm volatile ( @@ -981,7 +930,6 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } -#endif // HAS_ARGB1555TOARGBROW_NEON #define ARGB4444TOARGB \ "shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \ @@ -995,7 +943,6 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, "dup v0.2D, v2.D[1] \n" \ "dup v1.2D, v3.D[1] \n" -#ifdef HAS_ARGB4444TOARGBROW_NEON void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, int width) { asm volatile ( @@ -1014,9 +961,7 @@ void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List ); } -#endif // HAS_ARGB4444TOARGBROW_NEON -#ifdef HAS_ARGBTORGB24ROW_NEON void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { asm volatile ( "1: \n" @@ -1033,9 +978,7 @@ void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List ); } -#endif // HAS_ARGBTORGB24ROW_NEON -#ifdef HAS_ARGBTORAWROW_NEON void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { asm volatile ( "1: \n" @@ -1054,9 +997,7 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List ); } -#endif // HAS_ARGBTORAWROW_NEON -#ifdef HAS_YUY2TOYROW_NEON void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { asm volatile ( "1: \n" @@ -1073,9 +1014,7 @@ void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1" // Clobber List ); } -#endif // HAS_YUY2TOYROW_NEON -#ifdef HAS_UYVYTOYROW_NEON void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { asm volatile ( "1: \n" @@ -1092,9 +1031,7 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1" // Clobber List ); } -#endif // HAS_UYVYTOYROW_NEON -#ifdef HAS_YUY2TOUV422ROW_NEON void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int width) { asm volatile ( @@ -1115,9 +1052,7 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } -#endif // HAS_YUY2TOUV422ROW_NEON -#ifdef HAS_UYVYTOUV422ROW_NEON void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int width) { asm volatile ( @@ -1138,9 +1073,7 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } -#endif // HAS_UYVYTOUV422ROW_NEON -#ifdef HAS_YUY2TOUVROW_NEON void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_yuy2b = src_yuy2 + stride_yuy2; @@ -1168,9 +1101,7 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, "v5", "v6", "v7" // Clobber List ); } -#endif // HAS_YUY2TOUVROW_NEON -#ifdef HAS_UYVYTOUVROW_NEON void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_uyvyb = src_uyvy + stride_uyvy; @@ -1198,10 +1129,8 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, "v5", "v6", "v7" // Clobber List ); } -#endif // HAS_UYVYTOUVROW_NEON // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -#ifdef HAS_ARGBSHUFFLEROW_NEON void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, const uint8* shuffler, int width) { asm volatile ( @@ -1222,9 +1151,7 @@ void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, : "cc", "memory", "v0", "v1", "v2" // Clobber List ); } -#endif // HAS_ARGBSHUFFLEROW_NEON -#ifdef HAS_I422TOYUY2ROW_NEON void I422ToYUY2Row_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1251,9 +1178,7 @@ void I422ToYUY2Row_NEON(const uint8* src_y, : "cc", "memory", "v0", "v1", "v2", "v3" ); } -#endif // HAS_I422TOYUY2ROW_NEON -#ifdef HAS_I422TOUYVYROW_NEON void I422ToUYVYRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, @@ -1280,9 +1205,7 @@ void I422ToUYVYRow_NEON(const uint8* src_y, : "cc", "memory", "v0", "v1", "v2", "v3" ); } -#endif // HAS_I422TOUYVYROW_NEON -#ifdef HAS_ARGBTORGB565ROW_NEON void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { asm volatile ( "1: \n" @@ -1300,9 +1223,7 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { : "cc", "memory", "v0", "v20", "v21", "v22", "v23" ); } -#endif // HAS_ARGBTORGB565ROW_NEON -#ifdef HAS_ARGBTORGB565DITHERROW_NEON void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, const uint32 dither4, int width) { asm volatile ( @@ -1325,9 +1246,7 @@ void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23" ); } -#endif // HAS_ARGBTORGB565ROW_NEON -#ifdef HAS_ARGBTOARGB1555ROW_NEON void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, int width) { asm volatile ( @@ -1346,9 +1265,7 @@ void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, : "cc", "memory", "v0", "v20", "v21", "v22", "v23" ); } -#endif // HAS_ARGBTOARGB1555ROW_NEON -#ifdef HAS_ARGBTOARGB4444ROW_NEON void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, int width) { asm volatile ( @@ -1368,9 +1285,7 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23" ); } -#endif // HAS_ARGBTOARGB4444ROW_NEON -#ifdef HAS_ARGBTOYROW_NEON void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { asm volatile ( "movi v4.8b, #13 \n" // B * 0.1016 coefficient @@ -1396,9 +1311,7 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } -#endif // HAS_ARGBTOYROW_NEON -#ifdef HAS_ARGBEXTRACTALPHAROW_NEON void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { asm volatile ( "1: \n" @@ -1415,9 +1328,7 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } -#endif // HAS_ARGBEXTRACTALPHAROW_NEON -#ifdef HAS_ARGBTOYJROW_NEON void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { asm volatile ( "movi v4.8b, #15 \n" // B * 0.11400 coefficient @@ -1441,10 +1352,8 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" ); } -#endif // HAS_ARGBTOYJROW_NEON // 8x1 pixels. -#ifdef HAS_ARGBTOUV444ROW_NEON void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width) { asm volatile ( @@ -1485,7 +1394,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, "v24", "v25", "v26", "v27", "v28", "v29" ); } -#endif // HAS_ARGBTOUV444ROW_NEON #define RGBTOUV_SETUP_REG \ "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ @@ -1496,7 +1404,6 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. -#ifdef HAS_ARGBTOUV411ROW_NEON void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, int width) { asm volatile ( @@ -1546,7 +1453,6 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, "v20", "v21", "v22", "v23", "v24", "v25" ); } -#endif // HAS_ARGBTOUV411ROW_NEON // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. #define RGBTOUV(QB, QG, QR) \ @@ -1564,7 +1470,6 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, // TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. // TODO(fbarchard): consider ptrdiff_t for all strides. -#ifdef HAS_ARGBTOUVROW_NEON void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_argb_1 = src_argb + src_stride_argb; @@ -1604,10 +1509,8 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, "v20", "v21", "v22", "v23", "v24", "v25" ); } -#endif // HAS_ARGBTOUVROW_NEON // TODO(fbarchard): Subsample match C code. -#ifdef HAS_ARGBTOUVJROW_NEON void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_argb_1 = src_argb + src_stride_argb; @@ -1651,9 +1554,7 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, "v20", "v21", "v22", "v23", "v24", "v25" ); } -#endif // HAS_ARGBTOUVJROW_NEON -#ifdef HAS_BGRATOUVROW_NEON void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_bgra_1 = src_bgra + src_stride_bgra; @@ -1692,9 +1593,7 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, "v20", "v21", "v22", "v23", "v24", "v25" ); } -#endif // HAS_BGRATOUVROW_NEON -#ifdef HAS_ABGRTOUVROW_NEON void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_abgr_1 = src_abgr + src_stride_abgr; @@ -1733,9 +1632,7 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, "v20", "v21", "v22", "v23", "v24", "v25" ); } -#endif // HAS_ABGRTOUVROW_NEON -#ifdef HAS_RGBATOUVROW_NEON void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_rgba_1 = src_rgba + src_stride_rgba; @@ -1774,9 +1671,7 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, "v20", "v21", "v22", "v23", "v24", "v25" ); } -#endif // HAS_RGBATOUVROW_NEON -#ifdef HAS_RGB24TOUVROW_NEON void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_rgb24_1 = src_rgb24 + src_stride_rgb24; @@ -1815,9 +1710,7 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, "v20", "v21", "v22", "v23", "v24", "v25" ); } -#endif // HAS_RGB24TOUVROW_NEON -#ifdef HAS_RAWTOUVROW_NEON void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_raw_1 = src_raw + src_stride_raw; @@ -1856,10 +1749,8 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, "v20", "v21", "v22", "v23", "v24", "v25" ); } -#endif // HAS_RAWTOUVROW_NEON // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -#ifdef HAS_RGB565TOUVROW_NEON void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_rgb565_1 = src_rgb565 + src_stride_rgb565; @@ -1932,10 +1823,8 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, "v25", "v26", "v27" ); } -#endif // HAS_RGB565TOUVROW_NEON // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -#ifdef HAS_ARGB1555TOUVROW_NEON void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_argb1555_1 = src_argb1555 + src_stride_argb1555; @@ -2003,10 +1892,8 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, "v26", "v27", "v28" ); } -#endif // HAS_ARGB1555TOUVROW_NEON // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. -#ifdef HAS_ARGB4444TOUVROW_NEON void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, uint8* dst_u, uint8* dst_v, int width) { const uint8* src_argb4444_1 = src_argb4444 + src_stride_argb4444; @@ -2075,9 +1962,7 @@ void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, ); } -#endif // HAS_ARGB4444TOUVROW_NEON -#ifdef HAS_RGB565TOYROW_NEON void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { asm volatile ( "movi v24.8b, #13 \n" // B * 0.1016 coefficient @@ -2105,9 +1990,7 @@ void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { "v24", "v25", "v26", "v27" ); } -#endif // HAS_RGB565TOYROW_NEON -#ifdef HAS_ARGB1555TOYROW_NEON void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { asm volatile ( "movi v4.8b, #13 \n" // B * 0.1016 coefficient @@ -2134,9 +2017,7 @@ void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } -#endif // HAS_ARGB1555TOYROW_NEON -#ifdef HAS_ARGB4444TOYROW_NEON void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { asm volatile ( "movi v24.8b, #13 \n" // B * 0.1016 coefficient @@ -2163,9 +2044,7 @@ void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27" ); } -#endif // HAS_ARGB4444TOYROW_NEON -#ifdef HAS_BGRATOYROW_NEON void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { asm volatile ( "movi v4.8b, #33 \n" // R * 0.2578 coefficient @@ -2191,9 +2070,7 @@ void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" ); } -#endif // HAS_BGRATOYROW_NEON -#ifdef HAS_ABGRTOYROW_NEON void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { asm volatile ( "movi v4.8b, #33 \n" // R * 0.2578 coefficient @@ -2219,9 +2096,7 @@ void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" ); } -#endif // HAS_ABGRTOYROW_NEON -#ifdef HAS_RGBATOYROW_NEON void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { asm volatile ( "movi v4.8b, #13 \n" // B * 0.1016 coefficient @@ -2247,9 +2122,7 @@ void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" ); } -#endif // HAS_RGBATOYROW_NEON -#ifdef HAS_RGB24TOYROW_NEON void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { asm volatile ( "movi v4.8b, #13 \n" // B * 0.1016 coefficient @@ -2275,9 +2148,7 @@ void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" ); } -#endif // HAS_RGB24TOYROW_NEON -#ifdef HAS_RAWTOYROW_NEON void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { asm volatile ( "movi v4.8b, #33 \n" // R * 0.2578 coefficient @@ -2303,10 +2174,8 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" ); } -#endif // HAS_RAWTOYROW_NEON // Bilinear filter 16x2 -> 16x1 -#ifdef HAS_INTERPOLATEROW_NEON void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { @@ -2372,10 +2241,8 @@ void InterpolateRow_NEON(uint8* dst_ptr, : "cc", "memory", "v0", "v1", "v3", "v4", "v5" ); } -#endif // HAS_INTERPOLATEROW_NEON // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr -#ifdef HAS_ARGBBLENDROW_NEON void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { asm volatile ( @@ -2444,10 +2311,8 @@ void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, "v16", "v17", "v18" ); } -#endif // HAS_ARGBBLENDROW_NEON // Attenuate 8 pixels at a time. -#ifdef HAS_ARGBATTENUATEROW_NEON void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { asm volatile ( // Attenuate 8 pixels. @@ -2471,11 +2336,9 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" ); } -#endif // HAS_ARGBATTENUATEROW_NEON // Quantize 8 ARGB pixels (32 bytes). // dst = (dst * scale >> 16) * interval_size + interval_offset; -#ifdef HAS_ARGBQUANTIZEROW_NEON void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, int interval_offset, int width) { asm volatile ( @@ -2515,12 +2378,10 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" ); } -#endif // HAS_ARGBQUANTIZEROW_NEON // Shade 8 pixels at a time by specified value. // NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. // Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. -#ifdef HAS_ARGBSHADEROW_NEON void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, uint32 value) { asm volatile ( @@ -2555,12 +2416,10 @@ void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, : "cc", "memory", "v0", "v4", "v5", "v6", "v7" ); } -#endif // HAS_ARGBSHADEROW_NEON // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels // Similar to ARGBToYJ but stores ARGB. // C code is (15 * b + 75 * g + 38 * r + 64) >> 7; -#ifdef HAS_ARGBGRAYROW_NEON void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { asm volatile ( "movi v24.8b, #15 \n" // B * 0.11400 coefficient @@ -2586,14 +2445,12 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26" ); } -#endif // HAS_ARGBGRAYROW_NEON // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. // b = (r * 35 + g * 68 + b * 17) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 -#ifdef HAS_ARGBSEPIAROW_NEON void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { asm volatile ( "movi v20.8b, #17 \n" // BB coefficient @@ -2631,12 +2488,10 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { "v20", "v21", "v22", "v24", "v25", "v26", "v28", "v29", "v30" ); } -#endif // HAS_ARGBSEPIAROW_NEON // Tranform 8 ARGB pixels (32 bytes) with color matrix. // TODO(fbarchard): Was same as Sepia except matrix is provided. This function // needs to saturate. Consider doing a non-saturating version. -#ifdef HAS_ARGBCOLORMATRIXROW_NEON void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, const int8* matrix_argb, int width) { asm volatile ( @@ -2696,11 +2551,9 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, "v18", "v19", "v22", "v23", "v24", "v25" ); } -#endif // HAS_ARGBCOLORMATRIXROW_NEON // TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -#ifdef HAS_ARGBMULTIPLYROW_NEON void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { asm volatile ( @@ -2731,10 +2584,8 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } -#endif // HAS_ARGBMULTIPLYROW_NEON // Add 2 rows of ARGB pixels together, 8 pixels at a time. -#ifdef HAS_ARGBADDROW_NEON void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { asm volatile ( @@ -2761,10 +2612,8 @@ void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } -#endif // HAS_ARGBADDROW_NEON // Subtract 2 rows of ARGB pixels, 8 pixels at a time. -#ifdef HAS_ARGBSUBTRACTROW_NEON void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width) { asm volatile ( @@ -2791,14 +2640,12 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" ); } -#endif // HAS_ARGBSUBTRACTROW_NEON // Adds Sobel X and Sobel Y and stores Sobel into ARGB. // A = 255 // R = Sobel // G = Sobel // B = Sobel -#ifdef HAS_SOBELROW_NEON void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, uint8* dst_argb, int width) { asm volatile ( @@ -2824,10 +2671,8 @@ void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, : "cc", "memory", "v0", "v1", "v2", "v3" ); } -#endif // HAS_SOBELROW_NEON // Adds Sobel X and Sobel Y and stores Sobel into plane. -#ifdef HAS_SOBELTOPLANEROW_NEON void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, uint8* dst_y, int width) { asm volatile ( @@ -2850,14 +2695,12 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, : "cc", "memory", "v0", "v1" ); } -#endif // HAS_SOBELTOPLANEROW_NEON // Mixes Sobel X, Sobel Y and Sobel into ARGB. // A = 255 // R = Sobel X // G = Sobel // B = Sobel Y -#ifdef HAS_SOBELXYROW_NEON void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, uint8* dst_argb, int width) { asm volatile ( @@ -2881,13 +2724,11 @@ void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, : "cc", "memory", "v0", "v1", "v2", "v3" ); } -#endif // HAS_SOBELXYROW_NEON // SobelX as a matrix is // -1 0 1 // -2 0 2 // -1 0 1 -#ifdef HAS_SOBELXROW_NEON void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, uint8* dst_sobelx, int width) { asm volatile ( @@ -2926,13 +2767,11 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } -#endif // HAS_SOBELXROW_NEON // SobelY as a matrix is // -1 -2 -1 // 0 0 0 // 1 2 1 -#ifdef HAS_SOBELYROW_NEON void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely, int width) { asm volatile ( @@ -2970,7 +2809,6 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } -#endif // HAS_SOBELYROW_NEON #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus