Fix register order for ARGBToUV_AVX2

BUG=269
TESTED=try bots
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/29249004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1200 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2014-12-15 18:07:09 +00:00
parent 11c3015712
commit f5f5d15dcd
5 changed files with 23 additions and 23 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1198 Version: 1200
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -189,7 +189,7 @@ extern "C" {
#define HAS_ARGBMIRRORROW_AVX2 #define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2
//#define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2 #define HAS_ARGBTOYROW_AVX2
#define HAS_COPYROW_AVX #define HAS_COPYROW_AVX

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1198 #define LIBYUV_VERSION 1200
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

View File

@ -1006,9 +1006,9 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
"+r"(dst_v), // %2 "+r"(dst_v), // %2
"+rm"(width) // %3 "+rm"(width) // %3
: "r"((intptr_t)(src_stride_argb)), // %4 : "r"((intptr_t)(src_stride_argb)), // %4
"m"(kARGBToV), // %5 "m"(kAddUV128), // %5
"m"(kARGBToU), // %6 "m"(kARGBToV), // %6
"m"(kAddUV128), // %7 "m"(kARGBToU), // %7
"m"(kShufARGBToUV_AVX) // %8 "m"(kShufARGBToUV_AVX) // %8
: "memory", "cc" : "memory", "cc"
#if defined(__native_client__) && defined(__x86_64__) #if defined(__native_client__) && defined(__x86_64__)

View File

@ -847,8 +847,8 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, kAddY16
movdqa xmm4, kBGRAToY movdqa xmm4, kBGRAToY
movdqa xmm5, kAddY16
convertloop: convertloop:
movdqu xmm0, [eax] movdqu xmm0, [eax]
@ -880,8 +880,8 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, kAddY16
movdqa xmm4, kABGRToY movdqa xmm4, kABGRToY
movdqa xmm5, kAddY16
convertloop: convertloop:
movdqu xmm0, [eax] movdqu xmm0, [eax]
@ -913,8 +913,8 @@ void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, kAddY16
movdqa xmm4, kRGBAToY movdqa xmm4, kRGBAToY
movdqa xmm5, kAddY16
convertloop: convertloop:
movdqu xmm0, [eax] movdqu xmm0, [eax]
@ -951,9 +951,9 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
@ -1021,9 +1021,9 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kARGBToUJ
movdqa xmm6, kARGBToVJ
movdqa xmm5, kAddUVJ128 movdqa xmm5, kAddUVJ128
movdqa xmm6, kARGBToVJ
movdqa xmm7, kARGBToUJ
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
@ -1156,9 +1156,9 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
mov edx, [esp + 4 + 8] // dst_u mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix mov ecx, [esp + 4 + 16] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
@ -1213,9 +1213,9 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
mov edx, [esp + 4 + 8] // dst_u mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix mov ecx, [esp + 4 + 16] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
@ -1273,9 +1273,9 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kBGRAToU
movdqa xmm6, kBGRAToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kBGRAToV
movdqa xmm7, kBGRAToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
@ -1343,9 +1343,9 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kABGRToU
movdqa xmm6, kABGRToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kABGRToV
movdqa xmm7, kABGRToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
@ -1413,9 +1413,9 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kRGBAToU
movdqa xmm6, kRGBAToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kRGBAToV
movdqa xmm7, kRGBAToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop: