mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 23:29:52 +08:00
Remove vmovdqa from UV code
BUG=181 TESTED=c:\intelsde\sde -hsw -- out\release\libyuv_unittest.exe --gtest_filter=*ARGBToI420* Review URL: https://webrtc-codereview.appspot.com/1091010 git-svn-id: http://libyuv.googlecode.com/svn/trunk@567 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
551d2b297e
commit
208280598e
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 566
|
Version: 567
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 566
|
#define LIBYUV_VERSION 567
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -797,10 +797,6 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
|
|||||||
lea edx, [edx + 32]
|
lea edx, [edx + 32]
|
||||||
jg convertloop
|
jg convertloop
|
||||||
ret
|
ret
|
||||||
vphaddw ymm0, ymm0, ymm1
|
|
||||||
vpermq ymm0, ymm0, 0xd8
|
|
||||||
vpackuswb ymm0, ymm0, ymm2
|
|
||||||
vpermq ymm0, ymm0, 0xd8
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOYROW_AVX2
|
#endif // HAS_ARGBTOYROW_AVX2
|
||||||
@ -1176,39 +1172,34 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
|
|||||||
vpavgb ymm2, ymm2, [eax + esi + 64]
|
vpavgb ymm2, ymm2, [eax + esi + 64]
|
||||||
vpavgb ymm3, ymm3, [eax + esi + 96]
|
vpavgb ymm3, ymm3, [eax + esi + 96]
|
||||||
lea eax, [eax + 128]
|
lea eax, [eax + 128]
|
||||||
vmovdqa ymm4, ymm0 // TODO(fbarchard): Remove.
|
vshufps ymm4, ymm0, ymm1, 0x88
|
||||||
vshufps ymm0, ymm0, ymm1, 0x88
|
vshufps ymm0, ymm0, ymm1, 0xdd
|
||||||
vshufps ymm4, ymm4, ymm1, 0xdd
|
|
||||||
vpavgb ymm0, ymm0, ymm4
|
vpavgb ymm0, ymm0, ymm4
|
||||||
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
|
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
|
||||||
vmovdqa ymm4, ymm2 // TODO(fbarchard): Remove.
|
vshufps ymm4, ymm2, ymm3, 0x88
|
||||||
vshufps ymm2, ymm2, ymm3, 0x88
|
vshufps ymm2, ymm2, ymm3, 0xdd
|
||||||
vshufps ymm4, ymm4, ymm3, 0xdd
|
|
||||||
vpavgb ymm2, ymm2, ymm4
|
vpavgb ymm2, ymm2, ymm4
|
||||||
vpermq ymm2, ymm2, 0xd8 // TODO(fbarchard): Remove.
|
vpermq ymm2, ymm2, 0xd8 // TODO(fbarchard): Remove.
|
||||||
|
|
||||||
// step 2 - convert to U and V
|
// step 2 - convert to U and V
|
||||||
// from here down is very similar to Y code except
|
// from here down is very similar to Y code except
|
||||||
// instead of 32 different pixels, its 16 pixels of U and 16 of V
|
// instead of 32 different pixels, its 16 pixels of U and 16 of V
|
||||||
vmovdqa ymm1, ymm0 // TODO(fbarchard): Remove.
|
vpmaddubsw ymm1, ymm0, ymm7 // U
|
||||||
vmovdqa ymm3, ymm2 // TODO(fbarchard): Remove.
|
vpmaddubsw ymm3, ymm2, ymm7
|
||||||
vpmaddubsw ymm0, ymm0, ymm7 // U
|
vpmaddubsw ymm0, ymm0, ymm6 // V
|
||||||
vpmaddubsw ymm2, ymm2, ymm7
|
vpmaddubsw ymm2, ymm2, ymm6
|
||||||
vpmaddubsw ymm1, ymm1, ymm6 // V
|
|
||||||
vpmaddubsw ymm3, ymm3, ymm6
|
|
||||||
vphaddw ymm0, ymm0, ymm2
|
|
||||||
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
|
|
||||||
vphaddw ymm1, ymm1, ymm3
|
vphaddw ymm1, ymm1, ymm3
|
||||||
vpermq ymm1, ymm1, 0xd8 // TODO(fbarchard): Remove.
|
vpermq ymm1, ymm1, 0xd8 // TODO(fbarchard): Remove.
|
||||||
vpsraw ymm0, ymm0, 8
|
vphaddw ymm0, ymm0, ymm2
|
||||||
|
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
|
||||||
vpsraw ymm1, ymm1, 8
|
vpsraw ymm1, ymm1, 8
|
||||||
vpacksswb ymm0, ymm0, ymm1
|
vpsraw ymm0, ymm0, 8
|
||||||
|
vpacksswb ymm0, ymm1, ymm0
|
||||||
vpermq ymm0, ymm0, 0xd8
|
vpermq ymm0, ymm0, 0xd8
|
||||||
vpaddb ymm0, ymm0, ymm5 // -> unsigned
|
vpaddb ymm0, ymm0, ymm5 // -> unsigned
|
||||||
|
|
||||||
// step 3 - store 16 U and 16 V values
|
// step 3 - store 16 U and 16 V values
|
||||||
sub ecx, 32
|
sub ecx, 32
|
||||||
vmovdqa ymm1, ymm0
|
|
||||||
vextractf128 qword ptr [edx], ymm0, 0 // U
|
vextractf128 qword ptr [edx], ymm0, 0 // U
|
||||||
vextractf128 qword ptr [edx + edi], ymm0, 1 // V
|
vextractf128 qword ptr [edx + edi], ymm0, 1 // V
|
||||||
lea edx, [edx + 16]
|
lea edx, [edx + 16]
|
||||||
@ -1320,39 +1311,34 @@ void ARGBToUVRow_Unaligned_AVX2(const uint8* src_argb0, int src_stride_argb,
|
|||||||
vpavgb ymm2, ymm2, [eax + esi + 64]
|
vpavgb ymm2, ymm2, [eax + esi + 64]
|
||||||
vpavgb ymm3, ymm3, [eax + esi + 96]
|
vpavgb ymm3, ymm3, [eax + esi + 96]
|
||||||
lea eax, [eax + 128]
|
lea eax, [eax + 128]
|
||||||
vmovdqa ymm4, ymm0
|
vshufps ymm4, ymm0, ymm1, 0x88
|
||||||
vshufps ymm0, ymm0, ymm1, 0x88
|
vshufps ymm0, ymm0, ymm1, 0xdd
|
||||||
vshufps ymm4, ymm4, ymm1, 0xdd
|
|
||||||
vpavgb ymm0, ymm0, ymm4
|
vpavgb ymm0, ymm0, ymm4
|
||||||
vpermq ymm0, ymm0, 0xd8
|
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
|
||||||
vmovdqa ymm4, ymm2
|
vshufps ymm4, ymm2, ymm3, 0x88
|
||||||
vshufps ymm2, ymm2, ymm3, 0x88
|
vshufps ymm2, ymm2, ymm3, 0xdd
|
||||||
vshufps ymm4, ymm4, ymm3, 0xdd
|
|
||||||
vpavgb ymm2, ymm2, ymm4
|
vpavgb ymm2, ymm2, ymm4
|
||||||
vpermq ymm2, ymm2, 0xd8
|
vpermq ymm2, ymm2, 0xd8 // TODO(fbarchard): Remove.
|
||||||
|
|
||||||
// step 2 - convert to U and V
|
// step 2 - convert to U and V
|
||||||
// from here down is very similar to Y code except
|
// from here down is very similar to Y code except
|
||||||
// instead of 32 different pixels, its 16 pixels of U and 16 of V
|
// instead of 32 different pixels, its 16 pixels of U and 16 of V
|
||||||
vmovdqa ymm1, ymm0
|
vpmaddubsw ymm1, ymm0, ymm7 // U
|
||||||
vmovdqa ymm3, ymm2
|
vpmaddubsw ymm3, ymm2, ymm7
|
||||||
vpmaddubsw ymm0, ymm0, ymm7 // U
|
vpmaddubsw ymm0, ymm0, ymm6 // V
|
||||||
vpmaddubsw ymm2, ymm2, ymm7
|
vpmaddubsw ymm2, ymm2, ymm6
|
||||||
vpmaddubsw ymm1, ymm1, ymm6 // V
|
|
||||||
vpmaddubsw ymm3, ymm3, ymm6
|
|
||||||
vphaddw ymm0, ymm0, ymm2
|
|
||||||
vpermq ymm0, ymm0, 0xd8
|
|
||||||
vphaddw ymm1, ymm1, ymm3
|
vphaddw ymm1, ymm1, ymm3
|
||||||
vpermq ymm1, ymm1, 0xd8
|
vpermq ymm1, ymm1, 0xd8 // TODO(fbarchard): Remove.
|
||||||
vpsraw ymm0, ymm0, 8
|
vphaddw ymm0, ymm0, ymm2
|
||||||
|
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
|
||||||
vpsraw ymm1, ymm1, 8
|
vpsraw ymm1, ymm1, 8
|
||||||
vpacksswb ymm0, ymm0, ymm1
|
vpsraw ymm0, ymm0, 8
|
||||||
|
vpacksswb ymm0, ymm1, ymm0
|
||||||
vpermq ymm0, ymm0, 0xd8
|
vpermq ymm0, ymm0, 0xd8
|
||||||
vpaddb ymm0, ymm0, ymm5 // -> unsigned
|
vpaddb ymm0, ymm0, ymm5 // -> unsigned
|
||||||
|
|
||||||
// step 3 - store 16 U and 16 V values
|
// step 3 - store 16 U and 16 V values
|
||||||
sub ecx, 32
|
sub ecx, 32
|
||||||
vmovdqa ymm1, ymm0
|
|
||||||
vextractf128 qword ptr [edx], ymm0, 0 // U
|
vextractf128 qword ptr [edx], ymm0, 0 // U
|
||||||
vextractf128 qword ptr [edx + edi], ymm0, 1 // V
|
vextractf128 qword ptr [edx + edi], ymm0, 1 // V
|
||||||
lea edx, [edx + 16]
|
lea edx, [edx + 16]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user