remove add 16 from ARGBToYJ and add rounding, for consistency with Windows version. row.h header macros sorted alphabetically.

BUG=269
TESTED=untested
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/32579005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1185 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2014-12-02 22:37:47 +00:00
parent b036cf700b
commit 540e8af80c
5 changed files with 151 additions and 141 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1184
Version: 1185
License: BSD
License File: LICENSE

View File

@ -57,38 +57,7 @@ extern "C" {
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
// Effects:
#define HAS_ARGBADDROW_SSE2
#define HAS_ARGBAFFINEROW_SSE2
#define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_ARGBCOPYALPHAROW_SSE2
#define HAS_ARGBCOPYYTOALPHAROW_SSE2
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
#define HAS_ARGBMIRRORROW_SSE2
#define HAS_ARGBMULTIPLYROW_SSE2
#define HAS_ARGBPOLYNOMIALROW_SSE2
#define HAS_ARGBQUANTIZEROW_SSE2
#define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBSHADEROW_SSE2
#define HAS_ARGBSUBTRACTROW_SSE2
#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
#define HAS_INTERPOLATEROW_SSE2
#define HAS_INTERPOLATEROW_SSSE3
#define HAS_RGBCOLORTABLEROW_X86
#define HAS_SOBELROW_SSE2
#define HAS_SOBELTOPLANEROW_SSE2
#define HAS_SOBELXROW_SSE2
#define HAS_SOBELXYROW_SSE2
#define HAS_SOBELYROW_SSE2
// Conversions:
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_ABGRTOUVROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3
#define HAS_ARGB1555TOARGBROW_SSE2
@ -105,6 +74,7 @@ extern "C" {
#define HAS_ARGBTOUV422ROW_SSSE3
#define HAS_ARGBTOUV444ROW_SSSE3
#define HAS_ARGBTOUVJROW_SSSE3
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBTOYJROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
#define HAS_BGRATOUVROW_SSSE3
@ -113,9 +83,9 @@ extern "C" {
#define HAS_COPYROW_SSE2
#define HAS_I400TOARGBROW_SSE2
#define HAS_I411TOARGBROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB4444ROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
@ -153,6 +123,36 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_SSE2
#define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOYROW_SSE2
// Effects:
#define HAS_ARGBADDROW_SSE2
#define HAS_ARGBAFFINEROW_SSE2
#define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_ARGBCOPYALPHAROW_SSE2
#define HAS_ARGBCOPYYTOALPHAROW_SSE2
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBLUMACOLORTABLEROW_SSSE3
#define HAS_ARGBMIRRORROW_SSE2
#define HAS_ARGBMULTIPLYROW_SSE2
#define HAS_ARGBPOLYNOMIALROW_SSE2
#define HAS_ARGBQUANTIZEROW_SSE2
#define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBSHADEROW_SSE2
#define HAS_ARGBSUBTRACTROW_SSE2
#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
#define HAS_INTERPOLATEROW_SSE2
#define HAS_INTERPOLATEROW_SSSE3
#define HAS_RGBCOLORTABLEROW_X86
#define HAS_SOBELROW_SSE2
#define HAS_SOBELTOPLANEROW_SSE2
#define HAS_SOBELXROW_SSE2
#define HAS_SOBELXYROW_SSE2
#define HAS_SOBELYROW_SSE2
#endif
// The following are available on x64 Visual C:
@ -184,33 +184,33 @@ extern "C" {
// The code supports NaCL but requires a new compiler and validator.
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_COPYROW_AVX
#define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2
#define HAS_ARGBCOPYALPHAROW_AVX2
#define HAS_ARGBCOPYYTOALPHAROW_AVX2
#define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2
#define HAS_COPYROW_AVX
#define HAS_I422TOABGRROW_AVX2
#define HAS_I422TOARGBROW_AVX2
#define HAS_I422TOBGRAROW_AVX2
#define HAS_I422TOABGRROW_AVX2
#define HAS_I422TORGBAROW_AVX2
#define HAS_YUY2TOYROW_AVX2
#define HAS_YUY2TOUV422ROW_AVX2
#define HAS_YUY2TOUVROW_AVX2
#define HAS_UYVYTOYROW_AVX2
#define HAS_UYVYTOUV422ROW_AVX2
#define HAS_UYVYTOUVROW_AVX2
#define HAS_SPLITUVROW_AVX2
#define HAS_MERGEUVROW_AVX2
#define HAS_MIRRORROW_AVX2
#define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBTOYROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
#define HAS_SPLITUVROW_AVX2
#define HAS_UYVYTOUV422ROW_AVX2
#define HAS_UYVYTOUVROW_AVX2
#define HAS_UYVYTOYROW_AVX2
#define HAS_YUY2TOUV422ROW_AVX2
#define HAS_YUY2TOUVROW_AVX2
#define HAS_YUY2TOYROW_AVX2
// Effects:
#define HAS_ARGBADDROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBATTENUATEROW_AVX2
#define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
#endif
@ -240,100 +240,101 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(LIBYUV_SSSE3_ONLY)
#define HAS_ARGBBLENDROW_SSE2
#define HAS_ARGBATTENUATEROW_SSE2
#define HAS_ARGBBLENDROW_SSE2
#define HAS_MIRRORROW_SSE2
#endif
// The following are available on arm64 platforms:
// TODO(fbarchard): Merge arm64 and arm7 neon macros.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_I444TOARGBROW_NEON
#define HAS_I422TOARGBROW_NEON
#define HAS_I411TOARGBROW_NEON
#define HAS_I422TOBGRAROW_NEON
#define HAS_I422TOABGRROW_NEON
#define HAS_I422TORGBAROW_NEON
#define HAS_I422TORGB24ROW_NEON
#define HAS_I422TORAWROW_NEON
#define HAS_I422TORGB565ROW_NEON
#define HAS_I422TOARGB1555ROW_NEON
#define HAS_I422TOARGB4444ROW_NEON
#define HAS_YTOARGBROW_NEON
#define HAS_I400TOARGBROW_NEON
#define HAS_NV12TOARGBROW_NEON
#define HAS_NV21TOARGBROW_NEON
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TORGB565ROW_NEON
#define HAS_YUY2TOARGBROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_SPLITUVROW_NEON
#define HAS_MERGEUVROW_NEON
#define HAS_COPYROW_NEON
#define HAS_SETROW_NEON
#define HAS_ARGBSETROWS_NEON
#define HAS_MIRRORROW_NEON
#define HAS_MIRRORUVROW_NEON
#define HAS_ARGBMIRRORROW_NEON
#define HAS_RGB24TOARGBROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RGB565TOARGBROW_NEON
#define HAS_ABGRTOUVROW_NEON
#define HAS_ABGRTOYROW_NEON
#define HAS_ARGB1555TOARGBROW_NEON
#define HAS_ARGB1555TOUVROW_NEON
#define HAS_ARGB1555TOYROW_NEON
#define HAS_ARGB4444TOARGBROW_NEON
#define HAS_ARGBTORGB24ROW_NEON
#define HAS_ARGBTORAWROW_NEON
#define HAS_YUY2TOYROW_NEON
#define HAS_UYVYTOYROW_NEON
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
#define HAS_UYVYTOUVROW_NEON
#define HAS_ARGBTOBAYERROW_NEON
#define HAS_ARGBTOBAYERGGROW_NEON
#define HAS_ARGB4444TOUVROW_NEON
#define HAS_ARGB4444TOYROW_NEON
#define HAS_ARGBADDROW_NEON
#define HAS_ARGBATTENUATEROW_NEON
#define HAS_ARGBBLENDROW_NEON
#define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBGRAYROW_NEON
#define HAS_ARGBMIRRORROW_NEON
#define HAS_ARGBMULTIPLYROW_NEON
#define HAS_ARGBQUANTIZEROW_NEON
#define HAS_ARGBSEPIAROW_NEON
#define HAS_ARGBSETROWS_NEON
#define HAS_ARGBSHADEROW_NEON
#define HAS_ARGBSHUFFLEROW_NEON
#define HAS_I422TOYUY2ROW_NEON
#define HAS_I422TOUYVYROW_NEON
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBSUBTRACTROW_NEON
#define HAS_ARGBTOARGB1555ROW_NEON
#define HAS_ARGBTOARGB4444ROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUV422ROW_NEON
#define HAS_ARGBTOBAYERGGROW_NEON
#define HAS_ARGBTOBAYERROW_NEON
#define HAS_ARGBTORAWROW_NEON
#define HAS_ARGBTORGB24ROW_NEON
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTOUV411ROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOUV422ROW_NEON
#define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUVJROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_BGRATOUVROW_NEON
#define HAS_ABGRTOUVROW_NEON
#define HAS_RGBATOUVROW_NEON
#define HAS_RGB24TOUVROW_NEON
#define HAS_RAWTOUVROW_NEON
#define HAS_RGB565TOUVROW_NEON
#define HAS_ARGB1555TOUVROW_NEON
#define HAS_ARGB4444TOUVROW_NEON
#define HAS_RGB565TOYROW_NEON
#define HAS_ARGB1555TOYROW_NEON
#define HAS_ARGB4444TOYROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_ABGRTOYROW_NEON
#define HAS_RGBATOYROW_NEON
#define HAS_RGB24TOYROW_NEON
#define HAS_RAWTOYROW_NEON
#define HAS_COPYROW_NEON
#define HAS_I400TOARGBROW_NEON
#define HAS_I411TOARGBROW_NEON
#define HAS_I422TOABGRROW_NEON
#define HAS_I422TOARGB1555ROW_NEON
#define HAS_I422TOARGB4444ROW_NEON
#define HAS_I422TOARGBROW_NEON
#define HAS_I422TOBGRAROW_NEON
#define HAS_I422TORAWROW_NEON
#define HAS_I422TORGB24ROW_NEON
#define HAS_I422TORGB565ROW_NEON
#define HAS_I422TORGBAROW_NEON
#define HAS_I422TOUYVYROW_NEON
#define HAS_I422TOYUY2ROW_NEON
#define HAS_I444TOARGBROW_NEON
#define HAS_INTERPOLATEROW_NEON
#define HAS_ARGBBLENDROW_NEON
#define HAS_ARGBATTENUATEROW_NEON
#define HAS_ARGBQUANTIZEROW_NEON
#define HAS_ARGBSHADEROW_NEON
#define HAS_ARGBGRAYROW_NEON
#define HAS_ARGBSEPIAROW_NEON
#define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBMULTIPLYROW_NEON
#define HAS_ARGBADDROW_NEON
#define HAS_ARGBSUBTRACTROW_NEON
#define HAS_MERGEUVROW_NEON
#define HAS_MIRRORROW_NEON
#define HAS_MIRRORUVROW_NEON
#define HAS_NV12TOARGBROW_NEON
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TOARGBROW_NEON
#define HAS_NV21TORGB565ROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RAWTOUVROW_NEON
#define HAS_RAWTOYROW_NEON
#define HAS_RGB24TOARGBROW_NEON
#define HAS_RGB24TOUVROW_NEON
#define HAS_RGB24TOYROW_NEON
#define HAS_RGB565TOARGBROW_NEON
#define HAS_RGB565TOUVROW_NEON
#define HAS_RGB565TOYROW_NEON
#define HAS_RGBATOUVROW_NEON
#define HAS_RGBATOYROW_NEON
#define HAS_SETROW_NEON
#define HAS_SOBELROW_NEON
#define HAS_SOBELTOPLANEROW_NEON
#define HAS_SOBELXYROW_NEON
#define HAS_SOBELXROW_NEON
#define HAS_SOBELXYROW_NEON
#define HAS_SOBELYROW_NEON
#define HAS_SPLITUVROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
#define HAS_UYVYTOUVROW_NEON
#define HAS_UYVYTOYROW_NEON
#define HAS_YTOARGBROW_NEON
#define HAS_YUY2TOARGBROW_NEON
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
#define HAS_YUY2TOYROW_NEON
#endif
// The following are available on Neon platforms:
@ -349,18 +350,18 @@ extern "C" {
#define HAS_ARGB4444TOYROW_NEON
#define HAS_ARGBTOARGB1555ROW_NEON
#define HAS_ARGBTOARGB4444ROW_NEON
#define HAS_ARGBTOBAYERROW_NEON
#define HAS_ARGBTOBAYERGGROW_NEON
#define HAS_ARGBTOBAYERROW_NEON
#define HAS_ARGBTORAWROW_NEON
#define HAS_ARGBTORGB24ROW_NEON
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTOUV411ROW_NEON
#define HAS_ARGBTOUV422ROW_NEON
#define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOUVJROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_COPYROW_NEON
@ -419,12 +420,12 @@ extern "C" {
#define HAS_ARGBSEPIAROW_NEON
#define HAS_ARGBSHADEROW_NEON
#define HAS_ARGBSUBTRACTROW_NEON
#define HAS_INTERPOLATEROW_NEON
#define HAS_SOBELROW_NEON
#define HAS_SOBELTOPLANEROW_NEON
#define HAS_SOBELXYROW_NEON
#define HAS_SOBELXROW_NEON
#define HAS_SOBELXYROW_NEON
#define HAS_SOBELYROW_NEON
#define HAS_INTERPOLATEROW_NEON
// TODO(fbarchard): Investigate neon unittest failure.
// #define HAS_ARGBCOLORMATRIXROW_NEON
#endif

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1184
#define LIBYUV_VERSION 1185
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -92,6 +92,7 @@ static uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
// 7 bit fixed point 0.5.
static vec16 kAddYJ64 = {
64, 64, 64, 64, 64, 64, 64, 64
};
@ -704,6 +705,7 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) {
#endif // HAS_RGB24TOARGBROW_SSSE3
#ifdef HAS_ARGBTOYROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
"movdqa %3,%%xmm4 \n"
@ -743,6 +745,8 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
#endif // HAS_ARGBTOYROW_SSSE3
#ifdef HAS_ARGBTOYJROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
"movdqa %3,%%xmm4 \n"
@ -788,6 +792,7 @@ static const lvec32 kPermdARGBToY_AVX = {
0, 4, 1, 5, 2, 6, 3, 7
};
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
"vbroadcastf128 %3,%%ymm4 \n"
@ -804,13 +809,13 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
"lea " MEMLEA(0x80,0) ",%0 \n"
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
"vpsrlw $0x7,%%ymm0,%%ymm0 \n"
"vpsrlw $0x7,%%ymm2,%%ymm2 \n"
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n"
"vpermd %%ymm0,%%ymm6,%%ymm0 \n"
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x20,%2 \n"
@ -831,6 +836,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
#endif // HAS_ARGBTOYROW_AVX2
#ifdef HAS_ARGBTOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile (
"vbroadcastf128 %3,%%ymm4 \n"
@ -847,13 +853,14 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
"vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
"lea " MEMLEA(0x80,0) ",%0 \n"
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates.
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n"
"vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding.
"vpaddw %%ymm5,%%ymm2,%%ymm2 \n"
"vpsrlw $0x7,%%ymm0,%%ymm0 \n"
"vpsrlw $0x7,%%ymm2,%%ymm2 \n"
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n"
"vpermd %%ymm0,%%ymm6,%%ymm0 \n"
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x20,%2 \n"
@ -863,7 +870,7 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
"+r"(dst_y), // %1
"+r"(pix) // %2
: "m"(kARGBToYJ), // %3
"m"(kAddY16), // %4
"m"(kAddYJ64), // %4
"m"(kPermdARGBToY_AVX) // %5
: "memory", "cc"
#if defined(__SSE2__)

View File

@ -210,6 +210,7 @@ static const uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
// 7 bit fixed point 0.5.
static const vec16 kAddYJ64 = {
64, 64, 64, 64, 64, 64, 64, 64
};
@ -697,8 +698,8 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */
movdqa xmm5, kAddY16
movdqa xmm4, kARGBToY
movdqa xmm5, kAddY16
convertloop:
movdqu xmm0, [eax]
@ -724,7 +725,8 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
}
}
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
// Same as ARGBToYRow but different coefficients, no add 16, but do rounding.
__declspec(naked) __declspec(align(16))
void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
@ -787,7 +789,7 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
vpsrlw ymm2, ymm2, 7
vpackuswb ymm0, ymm0, ymm2 // mutates.
vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation.
vpaddb ymm0, ymm0, ymm5
vpaddb ymm0, ymm0, ymm5 // add 16 for Y
vmovdqu [edx], ymm0
lea edx, [edx + 32]
sub ecx, 32