diff --git a/README.chromium b/README.chromium index ef2a1bbcc..5928823f8 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 297 +Version: 298 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 496386c50..fe40f3a7f 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -15,7 +15,7 @@ // TODO(fbarchard): Remove the following headers includes #include "libyuv/convert.h" -#include "libyuv/planar_functions.h" +#include "libyuv/convert_argb.h" #ifdef __cplusplus namespace libyuv { @@ -188,11 +188,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Multiply ARGB image by ARGB value. -int ARGBShade(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, uint32 value); - // Convert MJPG to ARGB. int MJPGToARGB(const uint8* sample, size_t sample_size, uint8* argb, int argb_stride, @@ -212,6 +207,11 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, int32* dst_cumsum, int dst_stride32_cumsum, int width, int height, int radius); +// Multiply ARGB image by ARGB value. +int ARGBShade(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb, int dst_stride_argb, + int width, int height, uint32 value); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/version.h b/include/libyuv/version.h index cd2291600..3f3f947bd 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 297 +#define LIBYUV_VERSION 298 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index 963ac0334..d88884be1 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -784,11 +784,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, return 0; } -// Visual C for x86 defines these. -#if defined(_M_X64) || defined(_M_IX86) -#define LIBYUV_LITTLE_ENDIAN -// GCC provided macros. -#elif __BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN +// Visual C x86 or GCC little endian. +#if defined(_M_X64) || defined(_M_IX86) || (defined(__BYTE_ORDER) && \ + (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN)) #define LIBYUV_LITTLE_ENDIAN #endif diff --git a/source/convert_from.cc b/source/convert_from.cc index 64365d873..b026f82e7 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -401,18 +401,15 @@ static void I42xToUYVYRow_C(const uint8* src_y, } } -// Visual C for x86 defines these. -#if defined(_M_X64) || defined(_M_IX86) -#define LIBYUV_LITTLE_ENDIAN -// GCC provided macros. -#elif __BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN +// Visual C x86 or GCC little endian. +#if defined(_M_X64) || defined(_M_IX86) || (defined(__BYTE_ORDER) && \ + (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN)) #define LIBYUV_LITTLE_ENDIAN #endif #ifdef LIBYUV_LITTLE_ENDIAN #define WRITEWORD(p, v) *reinterpret_cast(p) = v #else - static inline void WRITEWORD(uint8* p, uint32 v) { p[0] = (uint8)(v & 255); p[1] = (uint8)((v >> 8) & 255); diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 1192ef768..17e191231 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -60,8 +60,7 @@ int I420ToI400(const uint8* src_y, int src_stride_y, uint8*, int, uint8*, int, int width, int height) { - if (!src_y || !dst_y || - width <= 0 || height == 0) { + if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -112,8 +111,7 @@ int I420Mirror(const uint8* src_y, int src_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_y || !dst_u || !dst_v || + if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } @@ -143,9 +141,7 @@ int I420Mirror(const uint8* src_y, int src_stride_y, int ARGBMirror(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height) { - if (!src_argb || - !dst_argb || - width <= 0 || height == 0) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -224,6 +220,9 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, int ARGBToI400(const uint8* src_argb, int src_stride_argb, uint8* dst_y, int dst_stride_y, int width, int height) { + if (!src_argb || !dst_y || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -255,6 +254,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { + if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -298,6 +300,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, uint8* dst_rgb24, int dst_stride_rgb24, int width, int height) { + if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -330,6 +335,9 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, int ARGBToRAW(const uint8* src_argb, int src_stride_argb, uint8* dst_raw, int dst_stride_raw, int width, int height) { + if (!src_argb || !dst_raw || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -362,6 +370,9 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, uint8* dst_rgb565, int dst_stride_rgb565, int width, int height) { + if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -393,6 +404,9 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, uint8* dst_argb1555, int dst_stride_argb1555, int width, int height) { + if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -424,6 +438,9 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, uint8* dst_argb4444, int dst_stride_argb4444, int width, int height) { + if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -457,6 +474,9 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, const uint8* src_uv, int src_stride_uv, uint8* dst_rgb565, int dst_stride_rgb565, int width, int height) { + if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { + return -1; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -499,6 +519,9 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y, const uint8* src_vu, int src_stride_vu, uint8* dst_rgb565, int dst_stride_rgb565, int width, int height) { + if (!src_y || !src_vu || !dst_rgb565 || width <= 0 || height == 0) { + return -1; + } // Negative height means invert the image. if (height < 0) { height = -height; @@ -762,6 +785,9 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -796,6 +822,9 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { + return -1; + } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; @@ -866,7 +895,8 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb, int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb, const int8* matrix_argb, int dst_x, int dst_y, int width, int height) { - if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { + if (!dst_argb || !matrix_argb || width <= 0 || height <= 0 || + dst_x < 0 || dst_y < 0) { return -1; } void (*ARGBColorMatrixRow)(uint8* dst_argb, const int8* matrix_argb, @@ -890,7 +920,8 @@ int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb, int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, const uint8* table_argb, int dst_x, int dst_y, int width, int height) { - if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { + if (!dst_argb || !table_argb || width <= 0 || height <= 0 || + dst_x < 0 || dst_y < 0) { return -1; } void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, @@ -972,6 +1003,9 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int32* dst_cumsum, int dst_stride32_cumsum, int width, int height, int radius) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { + return -1; + } void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft, @@ -1052,6 +1086,30 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, int ARGBShade(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height, uint32 value) { + if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, + int width, uint32 value) = ARGBShadeRow_C; +#if defined(HAS_ARGBSHADE_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + ARGBShadeRow = ARGBShadeRow_SSE2; + } +#endif + + for (int y = 0; y < height; ++y) { + ARGBShadeRow(src_argb, dst_argb, width, value); + src_argb += src_stride_argb; + dst_argb += dst_stride_argb; + } + return 0; } #ifdef __cplusplus diff --git a/source/row.h b/source/row.h index 5b5dc4675..bc59694a6 100644 --- a/source/row.h +++ b/source/row.h @@ -85,6 +85,7 @@ extern "C" { // The following are Windows only: #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #define HAS_ARGBCOLORTABLEROW_X86 +#define HAS_ARGBSHADE_SSE2 #endif // The following are disabled when SSSE3 is available: @@ -516,6 +517,11 @@ void CumulativeSumToAverage_C(const int32* topleft, const int32* botleft, void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, const int32* previous_cumsum, int width); +void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, + uint32 value); +void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, + uint32 value); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_common.cc b/source/row_common.cc index 63fe3818e..7a431e732 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -956,6 +956,32 @@ void CumulativeSumToAverage_C(const int32* tl, const int32* bl, } } +#define REPEAT8(v) (v) | ((v) << 8) +#define SHADE(f, v) v * f >> 24 + +void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, + uint32 value) { + const uint32 b_scale = REPEAT8(value & 0xff); + const uint32 g_scale = REPEAT8((value >> 8) & 0xff); + const uint32 r_scale = REPEAT8((value >> 16) & 0xff); + const uint32 a_scale = REPEAT8(value >> 24); + + for (int i = 0; i < width; ++i) { + const uint32 b = REPEAT8(src_argb[0]); + const uint32 g = REPEAT8(src_argb[1]); + const uint32 r = REPEAT8(src_argb[2]); + const uint32 a = REPEAT8(src_argb[3]); + dst_argb[0] = SHADE(b, b_scale); + dst_argb[1] = SHADE(g, g_scale); + dst_argb[2] = SHADE(r, r_scale); + dst_argb[3] = SHADE(a, a_scale); + src_argb += 4; + dst_argb += 4; + } +} +#undef REPEAT8 +#undef SHADE + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_win.cc b/source/row_win.cc index 82ad2f365..3921c8e33 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -15,12 +15,12 @@ namespace libyuv { extern "C" { #endif -// This module is for Visual C x86 +// This module is for Visual C x86. #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #ifdef HAS_ARGBTOYROW_SSSE3 -// Constants for ARGB +// Constants for ARGB. static const vec8 kARGBToY = { 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 }; @@ -33,7 +33,7 @@ static const vec8 kARGBToV = { -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, }; -// Constants for BGRA +// Constants for BGRA. static const vec8 kBGRAToY = { 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 }; @@ -46,7 +46,7 @@ static const vec8 kBGRAToV = { 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 }; -// Constants for ABGR +// Constants for ABGR. static const vec8 kABGRToY = { 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 }; @@ -247,13 +247,13 @@ __asm { } } -// pmul method to replicate bits -// Math to replicate bits +// pmul method to replicate bits. +// Math to replicate bits: // (v << 8) | (v << 3) // v * 256 + v * 8 // v * (256 + 8) // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 -// 20 instructions +// 20 instructions. __declspec(naked) __declspec(align(16)) void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix) { @@ -358,7 +358,7 @@ __asm { } } -// 18 instructions +// 18 instructions. __declspec(naked) __declspec(align(16)) void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, int pix) { @@ -514,7 +514,7 @@ __asm { } } -// TODO(fbarchard): Improve sign extension/packing +// TODO(fbarchard): Improve sign extension/packing. __declspec(naked) __declspec(align(16)) void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { __asm { @@ -587,7 +587,7 @@ __asm { } } -// Convert 16 ARGB pixels (64 bytes) to 16 Y values +// Convert 16 ARGB pixels (64 bytes) to 16 Y values. __declspec(naked) __declspec(align(16)) void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { __asm { @@ -1249,8 +1249,9 @@ static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG }; static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; // TODO(fbarchard): NV12/NV21 fetch UV and use directly. +// TODO(fbarchard): Read that does half size on Y and treats 420 as 444. -// Read 8 UV from 411 +// Read 8 UV from 411. #define READYUV444 __asm { \ __asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \ __asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \ @@ -1258,7 +1259,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; __asm punpcklbw xmm0, xmm1 /* UV */ \ } -// Read 4 UV from 422, upsample to 8 UV +// Read 4 UV from 422, upsample to 8 UV. #define READYUV422 __asm { \ __asm movd xmm0, [esi] /* U */ \ __asm movd xmm1, [esi + edi] /* V */ \ @@ -1267,7 +1268,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ } -// Read 2 UV from 411, upsample to 8 UV +// Read 2 UV from 411, upsample to 8 UV. #define READYUV411 __asm { \ __asm movd xmm0, [esi] /* U */ \ __asm movd xmm1, [esi + edi] /* V */ \ @@ -1277,14 +1278,14 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; __asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \ } -// Read 4 UV from NV12, upsample to 8 UV +// Read 4 UV from NV12, upsample to 8 UV. #define READNV12 __asm { \ __asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \ __asm lea esi, [esi + 8] \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ } -// Convert 8 pixels: 8 UV and 8 Y +// Convert 8 pixels: 8 UV and 8 Y. #define YUVTORGB __asm { \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ __asm movdqa xmm1, xmm0 \ @@ -1312,7 +1313,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; __asm packuswb xmm2, xmm2 /* R */ \ } -// Convert 8 pixels: 8 VU and 8 Y +// Convert 8 pixels: 8 VU and 8 Y. #define YVUTORGB __asm { \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ __asm movdqa xmm1, xmm0 \ @@ -1341,7 +1342,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; } // 8 pixels, dest aligned 16. -// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes) +// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void I444ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, @@ -1384,7 +1385,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf, } // 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) +// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void I422ToARGBRow_SSSE3(const uint8* y_buf, const uint8* u_buf, @@ -1427,7 +1428,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, } // 8 pixels, dest aligned 16. -// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) +// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). // Similar to I420 but duplicate UV once more. __declspec(naked) __declspec(align(16)) void I411ToARGBRow_SSSE3(const uint8* y_buf, @@ -1471,7 +1472,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, } // 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) +// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void NV12ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, @@ -1509,7 +1510,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf, } // 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) +// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void NV21ToARGBRow_SSSE3(const uint8* y_buf, const uint8* uv_buf, @@ -1547,7 +1548,7 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf, } // 8 pixels, unaligned. -// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes) +// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, @@ -1590,7 +1591,7 @@ void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, } // 8 pixels, unaligned. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) +// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* u_buf, @@ -1633,7 +1634,7 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, } // 8 pixels, unaligned. -// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) +// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). // Similar to I420 but duplicate UV once more. __declspec(naked) __declspec(align(16)) void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, @@ -1678,7 +1679,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, // 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) +// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* uv_buf, @@ -1716,7 +1717,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, } // 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) +// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) __declspec(align(16)) void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, const uint8* uv_buf, @@ -2127,7 +2128,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { #endif // HAS_SPLITUV_SSE2 #ifdef HAS_COPYROW_SSE2 -// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time +// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. __declspec(naked) __declspec(align(16)) void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { __asm { @@ -2574,13 +2575,13 @@ static const uvec8 kShuffleAlpha = { 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 }; -// Same as SSE2, but replaces +// Same as SSE2, but replaces: // psrlw xmm3, 8 // alpha // pshufhw xmm3, xmm3,0F5h // 8 alpha words // pshuflw xmm3, xmm3,0F5h // with.. // pshufb xmm3, kShuffleAlpha // alpha -// Blend 8 pixels at a time +// Blend 8 pixels at a time. __declspec(naked) __declspec(align(16)) void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, @@ -2698,7 +2699,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, #ifdef HAS_ARGBATTENUATE_SSE2 // Attenuate 4 pixels at a time. -// aligned to 16 bytes +// Aligned to 16 bytes. __declspec(naked) __declspec(align(16)) void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { __asm { @@ -2741,7 +2742,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { #endif // HAS_ARGBATTENUATE_SSE2 #ifdef HAS_ARGBATTENUATE_SSSE3 -// Shuffle table duplicating alpha +// Shuffle table duplicating alpha. static const uvec8 kShuffleAlpha0 = { 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, }; @@ -2791,7 +2792,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { #ifdef HAS_ARGBUNATTENUATE_SSE2 // Unattenuate 4 pixels at a time. -// aligned to 16 bytes +// Aligned to 16 bytes. __declspec(naked) __declspec(align(16)) void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { @@ -2845,12 +2846,12 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, #endif // HAS_ARGBUNATTENUATE_SSE2 #ifdef HAS_ARGBGRAYROW_SSSE3 -// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R +// Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R static const vec8 kARGBToGray = { 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0 }; -// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels +// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels. __declspec(naked) __declspec(align(16)) void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) { __asm { @@ -2893,7 +2894,7 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) { // b = (r * 35 + g * 68 + b * 17) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 -// Constant for ARGB color to sepia tone +// Constant for ARGB color to sepia tone. static const vec8 kARGBToSepiaB = { 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0 }; @@ -3071,7 +3072,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, #ifdef HAS_ARGBQUANTIZEROW_SSE2 // Quantize 4 ARGB pixels (16 bytes). -// aligned to 16 bytes +// Aligned to 16 bytes. __declspec(naked) __declspec(align(16)) void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, int interval_offset, int width) { @@ -3306,6 +3307,42 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, } #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 +#ifdef HAS_ARGBSHADE_SSE2 +// Shade 4 pixels at a time by specified value. +// Aligned to 16 bytes. +__declspec(naked) __declspec(align(16)) +void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, + uint32 value) { + __asm { + mov eax, [esp + 4] // src_argb + mov edx, [esp + 8] // dst_argb + mov ecx, [esp + 12] // width + movd xmm2, [esp + 16] // value + sub edx, eax + punpcklbw xmm2, xmm2 + punpcklqdq xmm2, xmm2 + + align 16 + convertloop: + movdqa xmm0, [eax] // read 4 pixels + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm0 // first 2 + punpckhbw xmm1, xmm1 // next 2 + pmulhuw xmm0, xmm2 // argb * value + pmulhuw xmm1, xmm2 // argb * value + psrlw xmm0, 8 + psrlw xmm1, 8 + packuswb xmm0, xmm1 + sub ecx, 4 + movdqa [eax + edx], xmm0 + lea eax, [eax + 16] + jg convertloop + + ret + } +} +#endif // HAS_ARGBSHADE_SSE2 + #endif // _M_IX86 diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index ca5402283..384c3e0be 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -331,7 +331,6 @@ TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4) TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4) TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4) - TEST_F(libyuvTest, TestAttenuate) { SIMD_ALIGNED(uint8 orig_pixels[256][4]); SIMD_ALIGNED(uint8 atten_pixels[256][4]); @@ -649,4 +648,56 @@ TEST_F(libyuvTest, TestARGBMirror) { } } +TEST_F(libyuvTest, TestShade) { + SIMD_ALIGNED(uint8 orig_pixels[256][4]); + SIMD_ALIGNED(uint8 shade_pixels[256][4]); + + // Test unattenuation clamps + orig_pixels[0][0] = 10u; + orig_pixels[0][1] = 20u; + orig_pixels[0][2] = 40u; + orig_pixels[0][3] = 80u; + // Test unattenuation transparent and opaque are unaffected + orig_pixels[1][0] = 0u; + orig_pixels[1][1] = 0u; + orig_pixels[1][2] = 0u; + orig_pixels[1][3] = 255u; + orig_pixels[2][0] = 0u; + orig_pixels[2][1] = 0u; + orig_pixels[2][2] = 0u; + orig_pixels[2][3] = 0u; + orig_pixels[3][0] = 0u; + orig_pixels[3][1] = 0u; + orig_pixels[3][2] = 0u; + orig_pixels[3][3] = 0u; + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 4, 1, 0x80ffffff); + EXPECT_EQ(10u, shade_pixels[0][0]); + EXPECT_EQ(20u, shade_pixels[0][1]); + EXPECT_EQ(40u, shade_pixels[0][2]); + EXPECT_EQ(40u, shade_pixels[0][3]); + EXPECT_EQ(0u, shade_pixels[1][0]); + EXPECT_EQ(0u, shade_pixels[1][1]); + EXPECT_EQ(0u, shade_pixels[1][2]); + EXPECT_EQ(128u, shade_pixels[1][3]); + EXPECT_EQ(0u, shade_pixels[2][0]); + EXPECT_EQ(0u, shade_pixels[2][1]); + EXPECT_EQ(0u, shade_pixels[2][2]); + EXPECT_EQ(0u, shade_pixels[2][3]); + EXPECT_EQ(0u, shade_pixels[3][0]); + EXPECT_EQ(0u, shade_pixels[3][1]); + EXPECT_EQ(0u, shade_pixels[3][2]); + EXPECT_EQ(0u, shade_pixels[3][3]); + + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 4, 1, 0x80808080); + EXPECT_EQ(5u, shade_pixels[0][0]); + EXPECT_EQ(10u, shade_pixels[0][1]); + EXPECT_EQ(20u, shade_pixels[0][2]); + EXPECT_EQ(40u, shade_pixels[0][3]); + + for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) { + ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 256, 1, + 0x80808080); + } +} + } // namespace libyuv