diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 7ea8bf088..f238fd0f9 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -87,6 +87,8 @@ extern "C" { #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I422TOUYVYROW_SSE2 #define HAS_MERGEUV_SSE2 +#define HAS_I422TOARGB4444ROW_SSSE3 +#define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TORGB565ROW_SSSE3 // Effects @@ -150,6 +152,8 @@ extern "C" { #define HAS_I422TOBGRAROW_NEON #define HAS_I422TORAWROW_NEON #define HAS_I422TORGB24ROW_NEON +#define HAS_I422TOARGB4444ROW_NEON +#define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TORGB565ROW_NEON #define HAS_I422TORGBAROW_NEON #define HAS_MIRRORROW_NEON @@ -251,6 +255,16 @@ void I422ToRAWRow_NEON(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width); +void I422ToARGB4444Row_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +void I422ToARGB1555Row_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); void I422ToRGB565Row_NEON(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -481,6 +495,16 @@ void I422ToRAWRow_C(const uint8* y_buf, const uint8* v_buf, uint8* raw_buf, int width); +void I422ToARGB4444Row_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_rgb565, + int width); +void I422ToARGB1555Row_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_rgb565, + int width); void I422ToRGB565Row_C(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -536,6 +560,16 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf, const uint8* v_buf, uint8* rgba_buf, int width); +void I422ToARGB4444Row_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +void I422ToARGB1555Row_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); void I422ToRGB565Row_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -593,11 +627,6 @@ void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, const uint8* v_buf, uint8* rgba_buf, int width); -void I422ToRGB565Row_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); void I444ToARGBRow_Any_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -636,6 +665,16 @@ void I422ToRGBARow_Any_SSSE3(const uint8* y_buf, const uint8* v_buf, uint8* rgba_buf, int width); +void I422ToARGB4444Row_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgba_buf, + int width); +void I422ToARGB1555Row_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgba_buf, + int width); void I422ToRGB565Row_Any_SSSE3(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -723,6 +762,16 @@ void I422ToRAWRow_Any_NEON(const uint8* y_buf, const uint8* v_buf, uint8* rgb_buf, int width); +void I422ToARGB4444Row_Any_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +void I422ToARGB1555Row_Any_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); void I422ToRGB565Row_Any_NEON(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, diff --git a/source/convert_from.cc b/source/convert_from.cc index 4b12a4cc1..7cb61a0c1 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -916,6 +916,107 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, return 0; } +// Convert I420 to ARGB1555. +LIBYUV_API +int I420ToARGB1555(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb1555, int dst_stride_argb1555, + int width, int height) { + if (!src_y || !src_u || !src_v || !dst_argb1555 || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555; + dst_stride_argb1555 = -dst_stride_argb1555; + } + void (*I422ToARGB1555Row)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I422ToARGB1555Row_C; +#if defined(HAS_I422TOARGB1555ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { + I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I422ToARGB1555Row = I422ToARGB1555Row_SSSE3; + } + } +#elif defined(HAS_I422TOARGB1555ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I422ToARGB1555Row = I422ToARGB1555Row_NEON; + } + } +#endif + + for (int y = 0; y < height; ++y) { + I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width); + dst_argb1555 += dst_stride_argb1555; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + + +// Convert I420 to ARGB4444. +LIBYUV_API +int I420ToARGB4444(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_argb4444, int dst_stride_argb4444, + int width, int height) { + if (!src_y || !src_u || !src_v || !dst_argb4444 || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444; + dst_stride_argb4444 = -dst_stride_argb4444; + } + void (*I422ToARGB4444Row)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) = I422ToARGB4444Row_C; +#if defined(HAS_I422TOARGB4444ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { + I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3; + if (IS_ALIGNED(width, 8)) { + I422ToARGB4444Row = I422ToARGB4444Row_SSSE3; + } + } +#elif defined(HAS_I422TOARGB4444ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { + I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + I422ToARGB4444Row = I422ToARGB4444Row_NEON; + } + } +#endif + + for (int y = 0; y < height; ++y) { + I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width); + dst_argb4444 += dst_stride_argb4444; + src_y += src_stride_y; + if (y & 1) { + src_u += src_stride_u; + src_v += src_stride_v; + } + } + return 0; +} + // Convert I420 to RGB565. LIBYUV_API int I420ToRGB565(const uint8* src_y, int src_stride_y, @@ -942,10 +1043,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { - I422ToRGB565Row = I422ToRGB565Row_Unaligned_SSSE3; - if (IS_ALIGNED(dst_rgb565, 16) && IS_ALIGNED(dst_stride_rgb565, 16)) { - I422ToRGB565Row = I422ToRGB565Row_SSSE3; - } + I422ToRGB565Row = I422ToRGB565Row_SSSE3; } } #elif defined(HAS_I422TORGB565ROW_NEON) @@ -969,154 +1067,6 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, return 0; } -// Convert I420 to ARGB1555. -LIBYUV_API -int I420ToARGB1555(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGBRow_C; -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#elif defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; - } - } -#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; - } -#endif - - SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = - ARGBToARGB1555Row_C; -#if defined(HAS_ARGBTOARGB1555ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - if (width * 2 <= kMaxStride) { - ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; - } - if (IS_ALIGNED(width, 4)) { - ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, row, width); - ARGBToARGB1555Row(row, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to ARGB4444. -LIBYUV_API -int I420ToARGB4444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGBRow_C; -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#elif defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; - } - } -#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; - } -#endif - - SIMD_ALIGNED(uint8 row[kMaxStride]); - void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = - ARGBToARGB4444Row_C; -#if defined(HAS_ARGBTOARGB4444ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - if (width * 2 <= kMaxStride) { - ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; - } - if (IS_ALIGNED(width, 4)) { - ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, row, width); - ARGBToARGB4444Row(row, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - // Convert I420 to specified format LIBYUV_API int ConvertFromI420(const uint8* y, int y_stride, diff --git a/source/row_any.cc b/source/row_any.cc index e5f329fdf..fe932037a 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -47,8 +47,13 @@ YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1, 4, 7) YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, 1, 4, 7) -YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_Unaligned_SSSE3, - I422ToRGB565Row_C, 1, 2, 7) +// I422ToRGB565Row_SSSE3 is unaligned. +YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C, + 1, 2, 7) +YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C, + 1, 2, 7) +YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C, + 1, 2, 7) // I422ToRGB24Row_SSSE3 is unaligned. YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7) YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7) @@ -62,6 +67,10 @@ YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7) YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7) YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7) YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7) +YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C, + 1, 2, 7) +YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C, + 1, 2, 7) YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7) YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15) YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15) diff --git a/source/row_common.cc b/source/row_common.cc index e421df6df..f333efc83 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -542,6 +542,80 @@ void I422ToRAWRow_C(const uint8* y_buf, } } +void I422ToARGB4444Row_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb4444, + int width) { + uint8 b0; + uint8 g0; + uint8 r0; + uint8 b1; + uint8 g1; + uint8 r1; + for (int x = 0; x < width - 1; x += 2) { + YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); + YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1); + b0 = b0 >> 4; + g0 = g0 >> 4; + r0 = r0 >> 4; + b1 = b1 >> 4; + g1 = g1 >> 4; + r1 = r1 >> 4; + *reinterpret_cast(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | + (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000; + y_buf += 2; + u_buf += 1; + v_buf += 1; + dst_argb4444 += 4; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); + b0 = b0 >> 4; + g0 = g0 >> 4; + r0 = r0 >> 4; + *reinterpret_cast(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | + 0xf000; + } +} + +void I422ToARGB1555Row_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* dst_argb1555, + int width) { + uint8 b0; + uint8 g0; + uint8 r0; + uint8 b1; + uint8 g1; + uint8 r1; + for (int x = 0; x < width - 1; x += 2) { + YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); + YuvPixel2(y_buf[1], u_buf[0], v_buf[0], &b1, &g1, &r1); + b0 = b0 >> 3; + g0 = g0 >> 3; + r0 = r0 >> 3; + b1 = b1 >> 3; + g1 = g1 >> 3; + r1 = r1 >> 3; + *reinterpret_cast(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | + (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000; + y_buf += 2; + u_buf += 1; + v_buf += 1; + dst_argb1555 += 4; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel2(y_buf[0], u_buf[0], v_buf[0], &b0, &g0, &r0); + b0 = b0 >> 3; + g0 = g0 >> 3; + r0 = r0 >> 3; + *reinterpret_cast(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | + 0x8000; + } +} + void I422ToRGB565Row_C(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, @@ -1201,7 +1275,7 @@ void I422ToUYVYRow_C(const uint8* src_y, dst_frame[3] = src_y[0]; // duplicate last y } } - +#if !defined(YUV_DISABLE_ASM) #if defined(__x86_64__) || defined(__i386__) void I422ToRGB565Row_SSSE3(const uint8* y_buf, const uint8* u_buf, @@ -1212,18 +1286,51 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width); ARGBToRGB565Row_SSE2(row, rgb_buf, width); } - -void I422ToRGB565Row_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - SIMD_ALIGNED(uint8 row[kMaxStride]); - I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width); - ARGBToRGB565Row_SSE2(row, rgb_buf, width); -} #endif // defined(__x86_64__) || defined(__i386__) +#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) +void I422ToARGB1555Row_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + SIMD_ALIGNED(uint8 row[kMaxStride]); + I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width); + ARGBToARGB1555Row_SSE2(row, rgb_buf, width); +} + +void I422ToARGB4444Row_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + SIMD_ALIGNED(uint8 row[kMaxStride]); + I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width); + ARGBToARGB4444Row_SSE2(row, rgb_buf, width); +} +#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) +#if defined(__ARM_NEON__) +void I422ToARGB1555Row_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + SIMD_ALIGNED(uint8 row[kMaxStride]); + I422ToARGBRow_NEON(y_buf, u_buf, v_buf, row, width); + ARGBToARGB1555Row_NEON(row, rgb_buf, width); +} + +void I422ToARGB4444Row_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + SIMD_ALIGNED(uint8 row[kMaxStride]); + I422ToARGBRow_NEON(y_buf, u_buf, v_buf, row, width); + ARGBToARGB4444Row_NEON(row, rgb_buf, width); +} +#endif // defined(__ARM_NEON__) +#endif // !defined(YUV_DISABLE_ASM) #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_win.cc b/source/row_win.cc index 6268f83f7..f0001cf80 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -1757,7 +1757,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf, } } -// 8 pixels, dest aligned 16. +// 8 pixels, dest unaligned. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void I422ToRGB565Row_SSSE3(const uint8* y_buf, @@ -1820,7 +1820,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, por xmm1, xmm3 // BGR packssdw xmm0, xmm1 sub ecx, 8 - movdqa [edx], xmm0 // store 8 pixels of RGB565 + movdqu [edx], xmm0 // store 8 pixels of RGB565 lea edx, [edx + 16] jg convertloop @@ -2123,79 +2123,6 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, } } -// 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void I422ToRGB565Row_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb565_buf, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // rgb565 - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pxor xmm4, xmm4 - pcmpeqb xmm5, xmm5 // generate mask 0x0000001f - psrld xmm5, 27 - pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 - psrld xmm6, 26 - pslld xmm6, 5 - pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 - pslld xmm7, 11 - - align 16 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into RRGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm2 // RR - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRR first 4 pixels - punpckhwd xmm1, xmm2 // BGRR next 4 pixels - - // Step 3b: RRGB -> RGB565 - movdqa xmm3, xmm0 // B first 4 pixels of argb - movdqa xmm2, xmm0 // G - pslld xmm0, 8 // R - psrld xmm3, 3 // B - psrld xmm2, 5 // G - psrad xmm0, 16 // R - pand xmm3, xmm5 // B - pand xmm2, xmm6 // G - pand xmm0, xmm7 // R - por xmm3, xmm2 // BG - por xmm0, xmm3 // BGR - movdqa xmm3, xmm1 // B next 4 pixels of argb - movdqa xmm2, xmm1 // G - pslld xmm1, 8 // R - psrld xmm3, 3 // B - psrld xmm2, 5 // G - psrad xmm1, 16 // R - pand xmm3, xmm5 // B - pand xmm2, xmm6 // G - pand xmm1, xmm7 // R - por xmm3, xmm2 // BG - por xmm1, xmm3 // BGR - packssdw xmm0, xmm1 - sub ecx, 8 - movdqu [edx], xmm0 // store 8 pixels of RGB565 - lea edx, [edx + 16] - jg convertloop - - pop edi - pop esi - ret - } -} - // 8 pixels, dest aligned 16. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16))