mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
ARGBToY_NEON for faster screencasts
BUG=107 TEST=convert_test Review URL: https://webrtc-codereview.appspot.com/892004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@423 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
1ffd35fdbb
commit
0908a701e9
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 421
|
Version: 423
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -153,6 +153,7 @@ extern "C" {
|
|||||||
#define HAS_ARGBTORGB565ROW_NEON
|
#define HAS_ARGBTORGB565ROW_NEON
|
||||||
#define HAS_ARGBTOARGB1555ROW_NEON
|
#define HAS_ARGBTOARGB1555ROW_NEON
|
||||||
#define HAS_ARGBTOARGB4444ROW_NEON
|
#define HAS_ARGBTOARGB4444ROW_NEON
|
||||||
|
#define HAS_ARGBTOYROW_NEON
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on Mips platforms
|
// The following are available on Mips platforms
|
||||||
@ -259,6 +260,8 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
|
||||||
|
void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
|
||||||
|
|
||||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
|
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
|
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
||||||
@ -613,6 +616,8 @@ void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void RGBAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void RGBAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
|
||||||
|
|
||||||
void I422ToARGBRow_Any_NEON(const uint8* y_buf,
|
void I422ToARGBRow_Any_NEON(const uint8* y_buf,
|
||||||
const uint8* u_buf,
|
const uint8* u_buf,
|
||||||
const uint8* v_buf,
|
const uint8* v_buf,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 421
|
#define LIBYUV_VERSION 423
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -909,6 +909,15 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#elif defined(HAS_ARGBTOYROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
if (width > 8) {
|
||||||
|
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||||
|
}
|
||||||
|
if (IS_ALIGNED(width, 8)) {
|
||||||
|
ARGBToYRow = ARGBToYRow_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int y = 0; y < height - 1; y += 2) {
|
for (int y = 0; y < height - 1; y += 2) {
|
||||||
|
|||||||
@ -1083,27 +1083,30 @@ RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
|
|||||||
#undef RGBANY
|
#undef RGBANY
|
||||||
|
|
||||||
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
|
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
|
||||||
#define YANY(NAMEANY, ARGBTOY_SIMD, BPP) \
|
#define YANY(NAMEANY, ARGBTOY_SIMD, BPP, NUM) \
|
||||||
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
|
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
|
||||||
ARGBTOY_SIMD(src_argb, dst_y, width - 16); \
|
ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
|
||||||
ARGBTOY_SIMD(src_argb + (width - 16) * BPP, dst_y + (width - 16), 16); \
|
ARGBTOY_SIMD(src_argb + (width - NUM) * BPP, dst_y + (width - NUM), NUM);\
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||||
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4)
|
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 16)
|
||||||
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4)
|
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 16)
|
||||||
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4)
|
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 16)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_RGBATOYROW_SSSE3
|
#ifdef HAS_RGBATOYROW_SSSE3
|
||||||
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4)
|
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 16)
|
||||||
|
#endif
|
||||||
|
#ifdef HAS_ARGBTOYROW_NEON
|
||||||
|
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 8)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_YUY2TOYROW_SSE2
|
#ifdef HAS_YUY2TOYROW_SSE2
|
||||||
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2)
|
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 16)
|
||||||
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
|
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 16)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_YUY2TOYROW_NEON
|
#ifdef HAS_YUY2TOYROW_NEON
|
||||||
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2)
|
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 16)
|
||||||
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2)
|
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 16)
|
||||||
#endif
|
#endif
|
||||||
#undef YANY
|
#undef YANY
|
||||||
|
|
||||||
|
|||||||
@ -995,6 +995,34 @@ void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOARGB4444ROW_NEON
|
#endif // HAS_ARGBTOARGB4444ROW_NEON
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBTOYROW_NEON
|
||||||
|
void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||||
|
asm volatile (
|
||||||
|
"vmov.u8 d4, #13 \n" // B * 0.1016 coefficient
|
||||||
|
"vmov.u8 d5, #65 \n" // G * 0.5078 coefficient
|
||||||
|
"vmov.u8 d6, #33 \n" // R * 0.2578 coefficient
|
||||||
|
"vmov.u8 d7, #16 \n" // Add 16 constant
|
||||||
|
".p2align 2 \n"
|
||||||
|
"1: \n"
|
||||||
|
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
|
||||||
|
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||||
|
"vmull.u8 q8, d0, d4 \n" // B
|
||||||
|
"vmlal.u8 q8, d1, d5 \n" // G
|
||||||
|
"vmlal.u8 q8, d2, d6 \n" // R
|
||||||
|
"vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y
|
||||||
|
"vqadd.u8 d0, d7 \n"
|
||||||
|
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
|
||||||
|
"bgt 1b \n"
|
||||||
|
: "+r"(src_argb), // %0
|
||||||
|
"+r"(dst_y), // %1
|
||||||
|
"+r"(pix) // %2
|
||||||
|
:
|
||||||
|
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBTOYROW_NEON
|
||||||
|
|
||||||
#endif // __ARM_NEON__
|
#endif // __ARM_NEON__
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user