mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
remove add 16 from ARGBToYJ and add rounding, for consistency with Windows version. row.h header macros sorted alphabetically.
BUG=269 TESTED=untested R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/32579005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1185 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
b036cf700b
commit
540e8af80c
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1184
|
||||
Version: 1185
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -57,38 +57,7 @@ extern "C" {
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
// Effects:
|
||||
#define HAS_ARGBADDROW_SSE2
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
#define HAS_ARGBATTENUATEROW_SSSE3
|
||||
#define HAS_ARGBBLENDROW_SSSE3
|
||||
#define HAS_ARGBCOLORMATRIXROW_SSSE3
|
||||
#define HAS_ARGBCOLORTABLEROW_X86
|
||||
#define HAS_ARGBCOPYALPHAROW_SSE2
|
||||
#define HAS_ARGBCOPYYTOALPHAROW_SSE2
|
||||
#define HAS_ARGBGRAYROW_SSSE3
|
||||
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
||||
#define HAS_ARGBMIRRORROW_SSE2
|
||||
#define HAS_ARGBMULTIPLYROW_SSE2
|
||||
#define HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
#define HAS_ARGBQUANTIZEROW_SSE2
|
||||
#define HAS_ARGBSEPIAROW_SSSE3
|
||||
#define HAS_ARGBSHADEROW_SSE2
|
||||
#define HAS_ARGBSUBTRACTROW_SSE2
|
||||
#define HAS_ARGBUNATTENUATEROW_SSE2
|
||||
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
|
||||
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
|
||||
#define HAS_INTERPOLATEROW_SSE2
|
||||
#define HAS_INTERPOLATEROW_SSSE3
|
||||
#define HAS_RGBCOLORTABLEROW_X86
|
||||
#define HAS_SOBELROW_SSE2
|
||||
#define HAS_SOBELTOPLANEROW_SSE2
|
||||
#define HAS_SOBELXROW_SSE2
|
||||
#define HAS_SOBELXYROW_SSE2
|
||||
#define HAS_SOBELYROW_SSE2
|
||||
|
||||
// Conversions:
|
||||
#define HAS_ARGBTOUVROW_SSSE3
|
||||
#define HAS_ABGRTOUVROW_SSSE3
|
||||
#define HAS_ABGRTOYROW_SSSE3
|
||||
#define HAS_ARGB1555TOARGBROW_SSE2
|
||||
@ -105,6 +74,7 @@ extern "C" {
|
||||
#define HAS_ARGBTOUV422ROW_SSSE3
|
||||
#define HAS_ARGBTOUV444ROW_SSSE3
|
||||
#define HAS_ARGBTOUVJROW_SSSE3
|
||||
#define HAS_ARGBTOUVROW_SSSE3
|
||||
#define HAS_ARGBTOYJROW_SSSE3
|
||||
#define HAS_ARGBTOYROW_SSSE3
|
||||
#define HAS_BGRATOUVROW_SSSE3
|
||||
@ -113,9 +83,9 @@ extern "C" {
|
||||
#define HAS_COPYROW_SSE2
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I411TOARGBROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TOABGRROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
#define HAS_I422TOARGB4444ROW_SSSE3
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#define HAS_I422TOBGRAROW_SSSE3
|
||||
@ -153,6 +123,36 @@ extern "C" {
|
||||
#define HAS_YUY2TOUV422ROW_SSE2
|
||||
#define HAS_YUY2TOUVROW_SSE2
|
||||
#define HAS_YUY2TOYROW_SSE2
|
||||
|
||||
// Effects:
|
||||
#define HAS_ARGBADDROW_SSE2
|
||||
#define HAS_ARGBAFFINEROW_SSE2
|
||||
#define HAS_ARGBATTENUATEROW_SSSE3
|
||||
#define HAS_ARGBBLENDROW_SSSE3
|
||||
#define HAS_ARGBCOLORMATRIXROW_SSSE3
|
||||
#define HAS_ARGBCOLORTABLEROW_X86
|
||||
#define HAS_ARGBCOPYALPHAROW_SSE2
|
||||
#define HAS_ARGBCOPYYTOALPHAROW_SSE2
|
||||
#define HAS_ARGBGRAYROW_SSSE3
|
||||
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
||||
#define HAS_ARGBMIRRORROW_SSE2
|
||||
#define HAS_ARGBMULTIPLYROW_SSE2
|
||||
#define HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
#define HAS_ARGBQUANTIZEROW_SSE2
|
||||
#define HAS_ARGBSEPIAROW_SSSE3
|
||||
#define HAS_ARGBSHADEROW_SSE2
|
||||
#define HAS_ARGBSUBTRACTROW_SSE2
|
||||
#define HAS_ARGBUNATTENUATEROW_SSE2
|
||||
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
|
||||
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
|
||||
#define HAS_INTERPOLATEROW_SSE2
|
||||
#define HAS_INTERPOLATEROW_SSSE3
|
||||
#define HAS_RGBCOLORTABLEROW_X86
|
||||
#define HAS_SOBELROW_SSE2
|
||||
#define HAS_SOBELTOPLANEROW_SSE2
|
||||
#define HAS_SOBELXROW_SSE2
|
||||
#define HAS_SOBELXYROW_SSE2
|
||||
#define HAS_SOBELYROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on x64 Visual C:
|
||||
@ -184,33 +184,33 @@ extern "C" {
|
||||
// The code supports NaCL but requires a new compiler and validator.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
|
||||
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#define HAS_COPYROW_AVX
|
||||
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
||||
#define HAS_ARGBSHUFFLEROW_AVX2
|
||||
#define HAS_ARGBCOPYALPHAROW_AVX2
|
||||
#define HAS_ARGBCOPYYTOALPHAROW_AVX2
|
||||
#define HAS_ARGBMIRRORROW_AVX2
|
||||
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
||||
#define HAS_ARGBSHUFFLEROW_AVX2
|
||||
#define HAS_ARGBTOYJROW_AVX2
|
||||
#define HAS_ARGBTOYROW_AVX2
|
||||
#define HAS_COPYROW_AVX
|
||||
#define HAS_I422TOABGRROW_AVX2
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
#define HAS_I422TOBGRAROW_AVX2
|
||||
#define HAS_I422TOABGRROW_AVX2
|
||||
#define HAS_I422TORGBAROW_AVX2
|
||||
#define HAS_YUY2TOYROW_AVX2
|
||||
#define HAS_YUY2TOUV422ROW_AVX2
|
||||
#define HAS_YUY2TOUVROW_AVX2
|
||||
#define HAS_UYVYTOYROW_AVX2
|
||||
#define HAS_UYVYTOUV422ROW_AVX2
|
||||
#define HAS_UYVYTOUVROW_AVX2
|
||||
#define HAS_SPLITUVROW_AVX2
|
||||
#define HAS_MERGEUVROW_AVX2
|
||||
#define HAS_MIRRORROW_AVX2
|
||||
#define HAS_ARGBMIRRORROW_AVX2
|
||||
#define HAS_ARGBTOYROW_AVX2
|
||||
#define HAS_ARGBTOYJROW_AVX2
|
||||
#define HAS_SPLITUVROW_AVX2
|
||||
#define HAS_UYVYTOUV422ROW_AVX2
|
||||
#define HAS_UYVYTOUVROW_AVX2
|
||||
#define HAS_UYVYTOYROW_AVX2
|
||||
#define HAS_YUY2TOUV422ROW_AVX2
|
||||
#define HAS_YUY2TOUVROW_AVX2
|
||||
#define HAS_YUY2TOYROW_AVX2
|
||||
|
||||
// Effects:
|
||||
#define HAS_ARGBADDROW_AVX2
|
||||
#define HAS_ARGBSUBTRACTROW_AVX2
|
||||
#define HAS_ARGBMULTIPLYROW_AVX2
|
||||
#define HAS_ARGBATTENUATEROW_AVX2
|
||||
#define HAS_ARGBMULTIPLYROW_AVX2
|
||||
#define HAS_ARGBSUBTRACTROW_AVX2
|
||||
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||
#endif
|
||||
|
||||
@ -240,100 +240,101 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
|
||||
!defined(LIBYUV_SSSE3_ONLY)
|
||||
#define HAS_ARGBBLENDROW_SSE2
|
||||
#define HAS_ARGBATTENUATEROW_SSE2
|
||||
#define HAS_ARGBBLENDROW_SSE2
|
||||
#define HAS_MIRRORROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on arm64 platforms:
|
||||
// TODO(fbarchard): Merge arm64 and arm7 neon macros.
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
#define HAS_I444TOARGBROW_NEON
|
||||
#define HAS_I422TOARGBROW_NEON
|
||||
#define HAS_I411TOARGBROW_NEON
|
||||
#define HAS_I422TOBGRAROW_NEON
|
||||
#define HAS_I422TOABGRROW_NEON
|
||||
#define HAS_I422TORGBAROW_NEON
|
||||
#define HAS_I422TORGB24ROW_NEON
|
||||
#define HAS_I422TORAWROW_NEON
|
||||
#define HAS_I422TORGB565ROW_NEON
|
||||
#define HAS_I422TOARGB1555ROW_NEON
|
||||
#define HAS_I422TOARGB4444ROW_NEON
|
||||
#define HAS_YTOARGBROW_NEON
|
||||
#define HAS_I400TOARGBROW_NEON
|
||||
#define HAS_NV12TOARGBROW_NEON
|
||||
#define HAS_NV21TOARGBROW_NEON
|
||||
#define HAS_NV12TORGB565ROW_NEON
|
||||
#define HAS_NV21TORGB565ROW_NEON
|
||||
#define HAS_YUY2TOARGBROW_NEON
|
||||
#define HAS_UYVYTOARGBROW_NEON
|
||||
#define HAS_SPLITUVROW_NEON
|
||||
#define HAS_MERGEUVROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
#define HAS_SETROW_NEON
|
||||
#define HAS_ARGBSETROWS_NEON
|
||||
#define HAS_MIRRORROW_NEON
|
||||
#define HAS_MIRRORUVROW_NEON
|
||||
#define HAS_ARGBMIRRORROW_NEON
|
||||
#define HAS_RGB24TOARGBROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
#define HAS_RGB565TOARGBROW_NEON
|
||||
#define HAS_ABGRTOUVROW_NEON
|
||||
#define HAS_ABGRTOYROW_NEON
|
||||
#define HAS_ARGB1555TOARGBROW_NEON
|
||||
#define HAS_ARGB1555TOUVROW_NEON
|
||||
#define HAS_ARGB1555TOYROW_NEON
|
||||
#define HAS_ARGB4444TOARGBROW_NEON
|
||||
#define HAS_ARGBTORGB24ROW_NEON
|
||||
#define HAS_ARGBTORAWROW_NEON
|
||||
#define HAS_YUY2TOYROW_NEON
|
||||
#define HAS_UYVYTOYROW_NEON
|
||||
#define HAS_YUY2TOUV422ROW_NEON
|
||||
#define HAS_UYVYTOUV422ROW_NEON
|
||||
#define HAS_YUY2TOUVROW_NEON
|
||||
#define HAS_UYVYTOUVROW_NEON
|
||||
#define HAS_ARGBTOBAYERROW_NEON
|
||||
#define HAS_ARGBTOBAYERGGROW_NEON
|
||||
#define HAS_ARGB4444TOUVROW_NEON
|
||||
#define HAS_ARGB4444TOYROW_NEON
|
||||
#define HAS_ARGBADDROW_NEON
|
||||
#define HAS_ARGBATTENUATEROW_NEON
|
||||
#define HAS_ARGBBLENDROW_NEON
|
||||
#define HAS_ARGBCOLORMATRIXROW_NEON
|
||||
#define HAS_ARGBGRAYROW_NEON
|
||||
#define HAS_ARGBMIRRORROW_NEON
|
||||
#define HAS_ARGBMULTIPLYROW_NEON
|
||||
#define HAS_ARGBQUANTIZEROW_NEON
|
||||
#define HAS_ARGBSEPIAROW_NEON
|
||||
#define HAS_ARGBSETROWS_NEON
|
||||
#define HAS_ARGBSHADEROW_NEON
|
||||
#define HAS_ARGBSHUFFLEROW_NEON
|
||||
#define HAS_I422TOYUY2ROW_NEON
|
||||
#define HAS_I422TOUYVYROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBSUBTRACTROW_NEON
|
||||
#define HAS_ARGBTOARGB1555ROW_NEON
|
||||
#define HAS_ARGBTOARGB4444ROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_ARGBTOYJROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUV422ROW_NEON
|
||||
#define HAS_ARGBTOBAYERGGROW_NEON
|
||||
#define HAS_ARGBTOBAYERROW_NEON
|
||||
#define HAS_ARGBTORAWROW_NEON
|
||||
#define HAS_ARGBTORGB24ROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBTOUV411ROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOUV422ROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUVJROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOYJROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_BGRATOUVROW_NEON
|
||||
#define HAS_ABGRTOUVROW_NEON
|
||||
#define HAS_RGBATOUVROW_NEON
|
||||
#define HAS_RGB24TOUVROW_NEON
|
||||
#define HAS_RAWTOUVROW_NEON
|
||||
#define HAS_RGB565TOUVROW_NEON
|
||||
#define HAS_ARGB1555TOUVROW_NEON
|
||||
#define HAS_ARGB4444TOUVROW_NEON
|
||||
#define HAS_RGB565TOYROW_NEON
|
||||
#define HAS_ARGB1555TOYROW_NEON
|
||||
#define HAS_ARGB4444TOYROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_ABGRTOYROW_NEON
|
||||
#define HAS_RGBATOYROW_NEON
|
||||
#define HAS_RGB24TOYROW_NEON
|
||||
#define HAS_RAWTOYROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
#define HAS_I400TOARGBROW_NEON
|
||||
#define HAS_I411TOARGBROW_NEON
|
||||
#define HAS_I422TOABGRROW_NEON
|
||||
#define HAS_I422TOARGB1555ROW_NEON
|
||||
#define HAS_I422TOARGB4444ROW_NEON
|
||||
#define HAS_I422TOARGBROW_NEON
|
||||
#define HAS_I422TOBGRAROW_NEON
|
||||
#define HAS_I422TORAWROW_NEON
|
||||
#define HAS_I422TORGB24ROW_NEON
|
||||
#define HAS_I422TORGB565ROW_NEON
|
||||
#define HAS_I422TORGBAROW_NEON
|
||||
#define HAS_I422TOUYVYROW_NEON
|
||||
#define HAS_I422TOYUY2ROW_NEON
|
||||
#define HAS_I444TOARGBROW_NEON
|
||||
#define HAS_INTERPOLATEROW_NEON
|
||||
#define HAS_ARGBBLENDROW_NEON
|
||||
#define HAS_ARGBATTENUATEROW_NEON
|
||||
#define HAS_ARGBQUANTIZEROW_NEON
|
||||
#define HAS_ARGBSHADEROW_NEON
|
||||
#define HAS_ARGBGRAYROW_NEON
|
||||
#define HAS_ARGBSEPIAROW_NEON
|
||||
#define HAS_ARGBCOLORMATRIXROW_NEON
|
||||
#define HAS_ARGBMULTIPLYROW_NEON
|
||||
#define HAS_ARGBADDROW_NEON
|
||||
#define HAS_ARGBSUBTRACTROW_NEON
|
||||
#define HAS_MERGEUVROW_NEON
|
||||
#define HAS_MIRRORROW_NEON
|
||||
#define HAS_MIRRORUVROW_NEON
|
||||
#define HAS_NV12TOARGBROW_NEON
|
||||
#define HAS_NV12TORGB565ROW_NEON
|
||||
#define HAS_NV21TOARGBROW_NEON
|
||||
#define HAS_NV21TORGB565ROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
#define HAS_RAWTOUVROW_NEON
|
||||
#define HAS_RAWTOYROW_NEON
|
||||
#define HAS_RGB24TOARGBROW_NEON
|
||||
#define HAS_RGB24TOUVROW_NEON
|
||||
#define HAS_RGB24TOYROW_NEON
|
||||
#define HAS_RGB565TOARGBROW_NEON
|
||||
#define HAS_RGB565TOUVROW_NEON
|
||||
#define HAS_RGB565TOYROW_NEON
|
||||
#define HAS_RGBATOUVROW_NEON
|
||||
#define HAS_RGBATOYROW_NEON
|
||||
#define HAS_SETROW_NEON
|
||||
#define HAS_SOBELROW_NEON
|
||||
#define HAS_SOBELTOPLANEROW_NEON
|
||||
#define HAS_SOBELXYROW_NEON
|
||||
#define HAS_SOBELXROW_NEON
|
||||
#define HAS_SOBELXYROW_NEON
|
||||
#define HAS_SOBELYROW_NEON
|
||||
#define HAS_SPLITUVROW_NEON
|
||||
#define HAS_UYVYTOARGBROW_NEON
|
||||
#define HAS_UYVYTOUV422ROW_NEON
|
||||
#define HAS_UYVYTOUVROW_NEON
|
||||
#define HAS_UYVYTOYROW_NEON
|
||||
#define HAS_YTOARGBROW_NEON
|
||||
#define HAS_YUY2TOARGBROW_NEON
|
||||
#define HAS_YUY2TOUV422ROW_NEON
|
||||
#define HAS_YUY2TOUVROW_NEON
|
||||
#define HAS_YUY2TOYROW_NEON
|
||||
#endif
|
||||
|
||||
// The following are available on Neon platforms:
|
||||
@ -349,18 +350,18 @@ extern "C" {
|
||||
#define HAS_ARGB4444TOYROW_NEON
|
||||
#define HAS_ARGBTOARGB1555ROW_NEON
|
||||
#define HAS_ARGBTOARGB4444ROW_NEON
|
||||
#define HAS_ARGBTOBAYERROW_NEON
|
||||
#define HAS_ARGBTOBAYERGGROW_NEON
|
||||
#define HAS_ARGBTOBAYERROW_NEON
|
||||
#define HAS_ARGBTORAWROW_NEON
|
||||
#define HAS_ARGBTORGB24ROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBTOUV411ROW_NEON
|
||||
#define HAS_ARGBTOUV422ROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOUVJROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOYJROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_BGRATOUVROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
@ -419,12 +420,12 @@ extern "C" {
|
||||
#define HAS_ARGBSEPIAROW_NEON
|
||||
#define HAS_ARGBSHADEROW_NEON
|
||||
#define HAS_ARGBSUBTRACTROW_NEON
|
||||
#define HAS_INTERPOLATEROW_NEON
|
||||
#define HAS_SOBELROW_NEON
|
||||
#define HAS_SOBELTOPLANEROW_NEON
|
||||
#define HAS_SOBELXYROW_NEON
|
||||
#define HAS_SOBELXROW_NEON
|
||||
#define HAS_SOBELXYROW_NEON
|
||||
#define HAS_SOBELYROW_NEON
|
||||
#define HAS_INTERPOLATEROW_NEON
|
||||
// TODO(fbarchard): Investigate neon unittest failure.
|
||||
// #define HAS_ARGBCOLORMATRIXROW_NEON
|
||||
#endif
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1184
|
||||
#define LIBYUV_VERSION 1185
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -92,6 +92,7 @@ static uvec8 kAddY16 = {
|
||||
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
|
||||
};
|
||||
|
||||
// 7 bit fixed point 0.5.
|
||||
static vec16 kAddYJ64 = {
|
||||
64, 64, 64, 64, 64, 64, 64, 64
|
||||
};
|
||||
@ -704,6 +705,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
|
||||
#endif // HAS_RGB24TOARGBROW_SSSE3
|
||||
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
|
||||
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
asm volatile (
|
||||
"movdqa %3,%%xmm4 \n"
|
||||
@ -743,6 +745,8 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
#endif // HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
#ifdef HAS_ARGBTOYJROW_SSSE3
|
||||
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
|
||||
// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
|
||||
void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
asm volatile (
|
||||
"movdqa %3,%%xmm4 \n"
|
||||
@ -788,6 +792,7 @@ static const lvec32 kPermdARGBToY_AVX = {
|
||||
0, 4, 1, 5, 2, 6, 3, 7
|
||||
};
|
||||
|
||||
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
|
||||
void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
asm volatile (
|
||||
"vbroadcastf128 %3,%%ymm4 \n"
|
||||
@ -804,13 +809,13 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"lea " MEMLEA(0x80,0) ",%0 \n"
|
||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
|
||||
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
|
||||
"vpsrlw $0x7,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x7,%%ymm2,%%ymm2 \n"
|
||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpermd %%ymm0,%%ymm6,%%ymm0 \n"
|
||||
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
|
||||
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
|
||||
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
@ -831,6 +836,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
#endif // HAS_ARGBTOYROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBTOYJROW_AVX2
|
||||
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
|
||||
void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
asm volatile (
|
||||
"vbroadcastf128 %3,%%ymm4 \n"
|
||||
@ -847,13 +853,14 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
|
||||
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
|
||||
"lea " MEMLEA(0x80,0) ",%0 \n"
|
||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
|
||||
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
|
||||
"vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding.
|
||||
"vpaddw %%ymm5,%%ymm2,%%ymm2 \n"
|
||||
"vpsrlw $0x7,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $0x7,%%ymm2,%%ymm2 \n"
|
||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vpermd %%ymm0,%%ymm6,%%ymm0 \n"
|
||||
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
|
||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
|
||||
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
|
||||
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
@ -863,7 +870,7 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(pix) // %2
|
||||
: "m"(kARGBToYJ), // %3
|
||||
"m"(kAddY16), // %4
|
||||
"m"(kAddYJ64), // %4
|
||||
"m"(kPermdARGBToY_AVX) // %5
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
|
||||
@ -210,6 +210,7 @@ static const uvec8 kAddY16 = {
|
||||
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
|
||||
};
|
||||
|
||||
// 7 bit fixed point 0.5.
|
||||
static const vec16 kAddYJ64 = {
|
||||
64, 64, 64, 64, 64, 64, 64, 64
|
||||
};
|
||||
@ -697,8 +698,8 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
mov eax, [esp + 4] /* src_argb */
|
||||
mov edx, [esp + 8] /* dst_y */
|
||||
mov ecx, [esp + 12] /* pix */
|
||||
movdqa xmm5, kAddY16
|
||||
movdqa xmm4, kARGBToY
|
||||
movdqa xmm5, kAddY16
|
||||
|
||||
convertloop:
|
||||
movdqu xmm0, [eax]
|
||||
@ -724,7 +725,8 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
}
|
||||
}
|
||||
|
||||
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
|
||||
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
|
||||
// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
__asm {
|
||||
@ -787,7 +789,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
vpsrlw ymm2, ymm2, 7
|
||||
vpackuswb ymm0, ymm0, ymm2 // mutates.
|
||||
vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
|
||||
vpaddb ymm0, ymm0, ymm5
|
||||
vpaddb ymm0, ymm0, ymm5 // add 16 for Y
|
||||
vmovdqu [edx], ymm0
|
||||
lea edx, [edx + 32]
|
||||
sub ecx, 32
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user