mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 09:16:48 +08:00
port ARGB to 565 dithering SSE2 code to GCC.
Previously the assembly code was only available to Windows. This CL ports the SSE2 code to GCC syntax. When running a profiler on all the unittests, this function was the slowest of all functions that still ran in C code. 3.71% libyuv_unittest libyuv_unittest [.] ARGBToRGB565DitherRow_C Was ARGBToRGB565Dither_Opt (2894 ms) Now ARGBToRGB565Dither_Opt (432 ms) TBR=harryjin@google.com BUG=libyuv:492 Review URL: https://codereview.chromium.org/1397673002 .
This commit is contained in:
parent
3e38762d6b
commit
cc89e3a77b
@ -91,6 +91,7 @@ extern "C" {
|
||||
#define HAS_ARGBTOARGB4444ROW_SSE2
|
||||
#define HAS_ARGBTORAWROW_SSSE3
|
||||
#define HAS_ARGBTORGB24ROW_SSSE3
|
||||
#define HAS_ARGBTORGB565DITHERROW_SSE2
|
||||
#define HAS_ARGBTORGB565ROW_SSE2
|
||||
#define HAS_ARGBTOUV422ROW_SSSE3
|
||||
#define HAS_ARGBTOUV444ROW_SSSE3
|
||||
@ -102,8 +103,12 @@ extern "C" {
|
||||
#define HAS_BGRATOYROW_SSSE3
|
||||
#define HAS_COPYROW_ERMS
|
||||
#define HAS_COPYROW_SSE2
|
||||
#define HAS_H422TOABGRROW_SSSE3
|
||||
#define HAS_H422TOARGBROW_SSSE3
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I411TOARGBROW_SSSE3
|
||||
#define HAS_I422ALPHATOABGRROW_SSSE3
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||
@ -115,11 +120,11 @@ extern "C" {
|
||||
#define HAS_I422TORGBAROW_SSSE3
|
||||
#define HAS_I422TOUYVYROW_SSE2
|
||||
#define HAS_I422TOYUY2ROW_SSE2
|
||||
#define HAS_I444TOABGRROW_SSSE3
|
||||
#define HAS_I444TOARGBROW_SSSE3
|
||||
#define HAS_J400TOARGBROW_SSE2
|
||||
#define HAS_J422TOARGBROW_SSSE3
|
||||
#define HAS_J422TOABGRROW_SSSE3
|
||||
#define HAS_H422TOARGBROW_SSSE3
|
||||
#define HAS_H422TOABGRROW_SSSE3
|
||||
#define HAS_J422TOARGBROW_SSSE3
|
||||
#define HAS_MERGEUVROW_SSE2
|
||||
#define HAS_MIRRORROW_SSSE3
|
||||
#define HAS_MIRRORROW_UV_SSSE3
|
||||
@ -145,10 +150,6 @@ extern "C" {
|
||||
#define HAS_YUY2TOUV422ROW_SSE2
|
||||
#define HAS_YUY2TOUVROW_SSE2
|
||||
#define HAS_YUY2TOYROW_SSE2
|
||||
#define HAS_I444TOARGBROW_SSSE3
|
||||
#define HAS_I444TOABGRROW_SSSE3
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
#define HAS_I422ALPHATOABGRROW_SSSE3
|
||||
|
||||
// Effects:
|
||||
#define HAS_ARGBADDROW_SSE2
|
||||
@ -184,10 +185,10 @@ extern "C" {
|
||||
// The following are also available on x64 Visual C.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
|
||||
(!defined(__clang__) || defined(__SSSE3__))
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
#define HAS_I422ALPHATOABGRROW_SSSE3
|
||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for AVX2 Visual C and clangcl 32 bit:
|
||||
@ -199,17 +200,16 @@ extern "C" {
|
||||
#define HAS_ARGBTOARGB1555ROW_AVX2
|
||||
#define HAS_ARGBTOARGB4444ROW_AVX2
|
||||
#define HAS_ARGBTORGB565DITHERROW_AVX2
|
||||
#define HAS_ARGBTORGB565DITHERROW_SSE2
|
||||
#define HAS_ARGBTORGB565ROW_AVX2
|
||||
#define HAS_I411TOARGBROW_AVX2
|
||||
#define HAS_I422TOARGB1555ROW_AVX2
|
||||
#define HAS_I422TOARGB4444ROW_AVX2
|
||||
#define HAS_I422TORGB565ROW_AVX2
|
||||
#define HAS_I444TOARGBROW_AVX2
|
||||
#define HAS_I444TOABGRROW_AVX2
|
||||
#define HAS_I444TOARGBROW_AVX2
|
||||
#define HAS_J400TOARGBROW_AVX2
|
||||
#define HAS_RGB565TOARGBROW_AVX2
|
||||
#define HAS_NV12TORGB565ROW_AVX2
|
||||
#define HAS_RGB565TOARGBROW_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms, but
|
||||
@ -226,7 +226,11 @@ extern "C" {
|
||||
#define HAS_ARGBTOYJROW_AVX2
|
||||
#define HAS_ARGBTOYROW_AVX2
|
||||
#define HAS_COPYROW_AVX
|
||||
#define HAS_H422TOABGRROW_AVX2
|
||||
#define HAS_H422TOARGBROW_AVX2
|
||||
#define HAS_I400TOARGBROW_AVX2
|
||||
#define HAS_I422ALPHATOABGRROW_AVX2
|
||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||
#define HAS_I422TOABGRROW_AVX2
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
#define HAS_I422TOBGRAROW_AVX2
|
||||
@ -234,12 +238,12 @@ extern "C" {
|
||||
#define HAS_I422TORGB24ROW_AVX2
|
||||
#define HAS_I422TORGBAROW_AVX2
|
||||
#define HAS_INTERPOLATEROW_AVX2
|
||||
#define HAS_J422TOARGBROW_AVX2
|
||||
#define HAS_J422TOABGRROW_AVX2
|
||||
#define HAS_H422TOARGBROW_AVX2
|
||||
#define HAS_H422TOABGRROW_AVX2
|
||||
#define HAS_J422TOARGBROW_AVX2
|
||||
#define HAS_MERGEUVROW_AVX2
|
||||
#define HAS_MIRRORROW_AVX2
|
||||
#define HAS_NV12TOARGBROW_AVX2
|
||||
#define HAS_NV21TOARGBROW_AVX2
|
||||
#define HAS_SPLITUVROW_AVX2
|
||||
#define HAS_UYVYTOARGBROW_AVX2
|
||||
#define HAS_UYVYTOUV422ROW_AVX2
|
||||
@ -249,10 +253,6 @@ extern "C" {
|
||||
#define HAS_YUY2TOUV422ROW_AVX2
|
||||
#define HAS_YUY2TOUVROW_AVX2
|
||||
#define HAS_YUY2TOYROW_AVX2
|
||||
#define HAS_NV12TOARGBROW_AVX2
|
||||
#define HAS_NV21TOARGBROW_AVX2
|
||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||
#define HAS_I422ALPHATOABGRROW_AVX2
|
||||
|
||||
// Effects:
|
||||
#define HAS_ARGBADDROW_AVX2
|
||||
@ -273,10 +273,12 @@ extern "C" {
|
||||
#define HAS_ARGB4444TOARGBROW_NEON
|
||||
#define HAS_ARGB4444TOUVROW_NEON
|
||||
#define HAS_ARGB4444TOYROW_NEON
|
||||
#define HAS_ARGBSETROW_NEON
|
||||
#define HAS_ARGBTOARGB1555ROW_NEON
|
||||
#define HAS_ARGBTOARGB4444ROW_NEON
|
||||
#define HAS_ARGBTORAWROW_NEON
|
||||
#define HAS_ARGBTORGB24ROW_NEON
|
||||
#define HAS_ARGBTORGB565DITHERROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBTOUV411ROW_NEON
|
||||
#define HAS_ARGBTOUV422ROW_NEON
|
||||
@ -288,19 +290,12 @@ extern "C" {
|
||||
#define HAS_BGRATOUVROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
#define HAS_J400TOARGBROW_NEON
|
||||
#define HAS_I400TOARGBROW_NEON
|
||||
#define HAS_I411TOARGBROW_NEON
|
||||
#define HAS_I422TOARGBROW_NEON
|
||||
#define HAS_I422TOABGRROW_NEON
|
||||
#define HAS_I422TOARGB1555ROW_NEON
|
||||
#define HAS_I422TOARGB4444ROW_NEON
|
||||
// TODO(fbarchard): Implement aarch64 neon version
|
||||
#ifndef __aarch64__
|
||||
#define HAS_J422TOARGBROW_NEON
|
||||
#define HAS_J422TOABGRROW_NEON
|
||||
#define HAS_H422TOARGBROW_NEON
|
||||
#define HAS_H422TOABGRROW_NEON
|
||||
#endif
|
||||
#define HAS_I422TOARGBROW_NEON
|
||||
#define HAS_I422TOBGRAROW_NEON
|
||||
#define HAS_I422TORAWROW_NEON
|
||||
#define HAS_I422TORGB24ROW_NEON
|
||||
@ -309,6 +304,7 @@ extern "C" {
|
||||
#define HAS_I422TOUYVYROW_NEON
|
||||
#define HAS_I422TOYUY2ROW_NEON
|
||||
#define HAS_I444TOARGBROW_NEON
|
||||
#define HAS_J400TOARGBROW_NEON
|
||||
#define HAS_MERGEUVROW_NEON
|
||||
#define HAS_MIRRORROW_NEON
|
||||
#define HAS_MIRRORUVROW_NEON
|
||||
@ -327,29 +323,28 @@ extern "C" {
|
||||
#define HAS_RGBATOUVROW_NEON
|
||||
#define HAS_RGBATOYROW_NEON
|
||||
#define HAS_SETROW_NEON
|
||||
#define HAS_ARGBSETROW_NEON
|
||||
#define HAS_SPLITUVROW_NEON
|
||||
#define HAS_UYVYTOARGBROW_NEON
|
||||
#define HAS_UYVYTOUV422ROW_NEON
|
||||
#define HAS_UYVYTOUVROW_NEON
|
||||
#define HAS_UYVYTOYROW_NEON
|
||||
#define HAS_I400TOARGBROW_NEON
|
||||
#define HAS_YUY2TOARGBROW_NEON
|
||||
#define HAS_YUY2TOUV422ROW_NEON
|
||||
#define HAS_YUY2TOUVROW_NEON
|
||||
#define HAS_YUY2TOYROW_NEON
|
||||
#define HAS_ARGBTORGB565DITHERROW_NEON
|
||||
|
||||
// Effects:
|
||||
#define HAS_ARGBADDROW_NEON
|
||||
#define HAS_ARGBATTENUATEROW_NEON
|
||||
#define HAS_ARGBBLENDROW_NEON
|
||||
#define HAS_ARGBCOLORMATRIXROW_NEON
|
||||
#define HAS_ARGBGRAYROW_NEON
|
||||
#define HAS_ARGBMIRRORROW_NEON
|
||||
#define HAS_ARGBMULTIPLYROW_NEON
|
||||
#define HAS_ARGBQUANTIZEROW_NEON
|
||||
#define HAS_ARGBSEPIAROW_NEON
|
||||
#define HAS_ARGBSHADEROW_NEON
|
||||
#define HAS_ARGBSHUFFLEROW_NEON
|
||||
#define HAS_ARGBSUBTRACTROW_NEON
|
||||
#define HAS_INTERPOLATEROW_NEON
|
||||
#define HAS_SOBELROW_NEON
|
||||
@ -357,8 +352,6 @@ extern "C" {
|
||||
#define HAS_SOBELXROW_NEON
|
||||
#define HAS_SOBELXYROW_NEON
|
||||
#define HAS_SOBELYROW_NEON
|
||||
#define HAS_ARGBCOLORMATRIXROW_NEON
|
||||
#define HAS_ARGBSHUFFLEROW_NEON
|
||||
#endif
|
||||
|
||||
// The following are available on Mips platforms:
|
||||
@ -457,9 +450,9 @@ struct YuvConstants {
|
||||
#define KYTORGB 192
|
||||
#endif
|
||||
|
||||
extern struct YuvConstants kYuvConstants;
|
||||
extern struct YuvConstants kYuvJConstants;
|
||||
extern struct YuvConstants kYuvHConstants;
|
||||
extern struct YuvConstants kYuvConstants; // BT.601
|
||||
extern struct YuvConstants kYuvJConstants; // JPeg color space
|
||||
extern struct YuvConstants kYuvHConstants; // BT.709
|
||||
|
||||
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
|
||||
#define OMITFP
|
||||
|
||||
@ -526,6 +526,52 @@ void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToRGB565DitherRow_SSE2(const uint8* src, uint8* dst,
|
||||
const uint32 dither4, int pix) {
|
||||
asm volatile (
|
||||
"movd %3,%%xmm6 \n"
|
||||
"punpcklbw %%xmm6,%%xmm6 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"punpcklwd %%xmm6,%%xmm6 \n"
|
||||
"punpckhwd %%xmm7,%%xmm7 \n"
|
||||
"pcmpeqb %%xmm3,%%xmm3 \n"
|
||||
"psrld $0x1b,%%xmm3 \n"
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
"psrld $0x1a,%%xmm4 \n"
|
||||
"pslld $0x5,%%xmm4 \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"pslld $0xb,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"paddusb %%xmm6,%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"pslld $0x8,%%xmm0 \n"
|
||||
"psrld $0x3,%%xmm1 \n"
|
||||
"psrld $0x5,%%xmm2 \n"
|
||||
"psrad $0x10,%%xmm0 \n"
|
||||
"pand %%xmm3,%%xmm1 \n"
|
||||
"pand %%xmm4,%%xmm2 \n"
|
||||
"pand %%xmm5,%%xmm0 \n"
|
||||
"por %%xmm2,%%xmm1 \n"
|
||||
"por %%xmm1,%%xmm0 \n"
|
||||
"packssdw %%xmm0,%%xmm0 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(pix) // %2
|
||||
: "m"(dither4) // %3
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
|
||||
@ -833,7 +833,6 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
}
|
||||
}
|
||||
|
||||
// 4 pixels
|
||||
__declspec(naked)
|
||||
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
@ -871,7 +870,6 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels
|
||||
__declspec(naked)
|
||||
void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
|
||||
const uint32 dither4, int pix) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user