diff --git a/README.chromium b/README.chromium index 8e4ab96d1..24359ae46 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 131 +Version: 132 License: BSD License File: LICENSE diff --git a/source/row.h b/source/row.h index bbcac5fb3..192ab5680 100644 --- a/source/row.h +++ b/source/row.h @@ -61,6 +61,7 @@ // The following are available on Windows platforms #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) #define HAS_ARGB4444TOARGBROW_SSE2 +#define HAS_RGB565TOARGBROW_SSE2 #endif // The following are available on Neon platforms @@ -213,10 +214,12 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix); void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -// TODO(fbarchard): SSE2 565 etc -//void RGB565ToARGBRow_SSE2(const uint8* src_rgb, uint8* dst_argb, int pix); +// TODO(fbarchard): SSE2 555 //void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); #endif +#ifdef HAS_RGB565TOARGBROW_SSE2 +void RGB565ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); +#endif #ifdef HAS_ARGB4444TOARGBROW_SSE2 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); #endif diff --git a/source/row_common.cc b/source/row_common.cc index 91bad5d54..6a02a4b8a 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -318,7 +318,11 @@ void RAWToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { // TODO(fbarchard): RGB565ToARGBRow_SSE2 for rgb conversion void RGB565ToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { SIMD_ALIGNED(uint8 row[kMaxStride]); +#ifdef HAS_RGB565TOARGBROW_SSE2 + RGB565ToARGBRow_SSE2(src_argb, row, pix); +#else RGB565ToARGBRow_C(src_argb, row, pix); +#endif ARGBToYRow_SSSE3(row, dst_y, pix); } @@ -361,8 +365,13 @@ void RAWToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, void RGB565ToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, uint8* dst_u, uint8* dst_v, int pix) { SIMD_ALIGNED(uint8 row[kMaxStride * 2]); +#ifdef HAS_RGB565TOARGBROW_SSE2 + RGB565ToARGBRow_SSE2(src_argb, row, pix); + RGB565ToARGBRow_SSE2(src_argb + src_stride_argb, row + kMaxStride, pix); +#else RGB565ToARGBRow_C(src_argb, row, pix); RGB565ToARGBRow_C(src_argb + src_stride_argb, row + kMaxStride, pix); +#endif ARGBToUVRow_SSSE3(row, kMaxStride, dst_u, dst_v, pix); } diff --git a/source/row_win.cc b/source/row_win.cc index 11b12ba04..e3325b959 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -229,6 +229,65 @@ __asm { } } +// TODO(fbarchard): Port RGB565ToARGBRow_SSE2 to gcc +__declspec(naked) +void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, + int pix) { +__asm { + mov eax, [esp + 4] // src_rgb565 + mov edx, [esp + 8] // dst_argb + mov ecx, [esp + 12] // pix + pcmpeqb xmm5, xmm5 // generate mask 0xff000000 + pslld xmm5, 24 + pcmpeqb xmm4, xmm4 // generate mask 0xf800f800 + psllw xmm4, 11 + pcmpeqb xmm6, xmm6 // generate mask 0x001f001f + psrlw xmm6, 11 + pcmpeqb xmm7, xmm7 // generate mask 0x00fc00fc + psrlw xmm7, 10 + psllw xmm7, 2 + + + convertloop: + movdqa xmm0, [eax] // fetch 8 pixels of bgr565 + lea eax, [eax + 16] + + movdqa xmm1, xmm0 + movdqa xmm2, xmm0 + pand xmm1, xmm4 // R in upper 5 bits + psrlw xmm2, 13 // R 3 bits + psllw xmm2, 8 + por xmm1, xmm2 + + movdqa xmm2, xmm0 + pand xmm2, xmm6 // mask B 5 bits + movdqa xmm3, xmm2 + psllw xmm2, 3 + psrlw xmm3, 2 + por xmm2, xmm3 + + por xmm1, xmm2 // RB + + psrlw xmm0, 3 // G in top 6 bits of lower byte + pand xmm0, xmm7 // mask G 6 bits + movdqa xmm2, xmm0 + psrlw xmm2, 6 + por xmm0, xmm2 + + por xmm0, xmm5 // AG + + movdqa xmm2, xmm1 + punpcklbw xmm1, xmm0 + punpckhbw xmm2, xmm0 + movdqa [edx], xmm1 // store 4 pixels of ARGB + movdqa [edx + 16], xmm2 // store next 4 pixels of ARGB + lea edx, [edx + 32] + sub ecx, 8 + ja convertloop + ret + } +} + // TODO(fbarchard): Port ARGB4444ToARGBRow_SSE2 to gcc __declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,