mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Report of affine to nacl using %k0
BUG=none TEST=none R=johannkoenig@google.com Review URL: https://webrtc-codereview.appspot.com/3929004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@855 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
e8c74b61d3
commit
a0630d77f0
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 855
|
Version: 856
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 855
|
#define LIBYUV_VERSION 856
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -5230,11 +5230,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
|
|||||||
#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
|
#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
|
||||||
|
|
||||||
#ifdef HAS_ARGBAFFINEROW_SSE2
|
#ifdef HAS_ARGBAFFINEROW_SSE2
|
||||||
// TODO(fbarchard): Find 64 bit way to avoid masking.
|
|
||||||
// Copy ARGB pixels from source image with slope to a row of destination.
|
// Copy ARGB pixels from source image with slope to a row of destination.
|
||||||
// Caveat - in 64 bit, movd is used with 64 bit gpr due to Mac gcc producing
|
|
||||||
// an error if movq is used. movd %%xmm0,%1
|
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||||
uint8* dst_argb, const float* src_dudv, int width) {
|
uint8* dst_argb, const float* src_dudv, int width) {
|
||||||
@ -5263,56 +5259,24 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
|||||||
// 4 pixel loop \n"
|
// 4 pixel loop \n"
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"40: \n"
|
"40: \n"
|
||||||
"cvttps2dq %%xmm2,%%xmm0 \n"
|
"cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2
|
||||||
"cvttps2dq %%xmm3,%%xmm1 \n"
|
"cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2
|
||||||
"packssdw %%xmm1,%%xmm0 \n"
|
"packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts
|
||||||
"pmaddwd %%xmm5,%%xmm0 \n"
|
"pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride
|
||||||
#if defined(__x86_64__)
|
"movd %%xmm0,%k1 \n"
|
||||||
// TODO(fbarchard): use a real movd to zero upper with %w1 for x64 and nacl.
|
|
||||||
"movd %%xmm0,%1 \n"
|
|
||||||
"mov %1,%5 \n"
|
|
||||||
"and $0x0fffffff,%1 \n"
|
|
||||||
"shr $32,%5 \n"
|
|
||||||
"pshufd $0xEE,%%xmm0,%%xmm0 \n"
|
|
||||||
#else
|
|
||||||
"movd %%xmm0,%1 \n"
|
|
||||||
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
||||||
"movd %%xmm0,%5 \n"
|
"movd %%xmm0,%k5 \n"
|
||||||
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
||||||
#endif
|
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
|
||||||
#if defined(__x86_64__) && defined(__native_client__)
|
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
|
||||||
BUNDLEALIGN
|
|
||||||
"lea (%q0,%q1,1),%%r14d \n"
|
|
||||||
"movd (%%r15,%%r14,1),%%xmm1 \n"
|
|
||||||
"lea (%q0,%q5,1),%%r14d \n"
|
|
||||||
"movd (%%r15,%%r14,1),%%xmm6 \n"
|
|
||||||
#else
|
|
||||||
"movd (%0,%1,1),%%xmm1 \n"
|
|
||||||
"movd (%0,%5,1),%%xmm6 \n"
|
|
||||||
#endif
|
|
||||||
"punpckldq %%xmm6,%%xmm1 \n"
|
"punpckldq %%xmm6,%%xmm1 \n"
|
||||||
"addps %%xmm4,%%xmm2 \n"
|
"addps %%xmm4,%%xmm2 \n"
|
||||||
"movq %%xmm1," MEMACCESS(2) " \n"
|
"movq %%xmm1," MEMACCESS(2) " \n"
|
||||||
#if defined(__x86_64__)
|
"movd %%xmm0,%k1 \n"
|
||||||
"movd %%xmm0,%1 \n"
|
|
||||||
"mov %1,%5 \n"
|
|
||||||
"and $0x0fffffff,%1 \n"
|
|
||||||
"shr $32,%5 \n"
|
|
||||||
#else
|
|
||||||
"movd %%xmm0,%1 \n"
|
|
||||||
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
||||||
"movd %%xmm0,%5 \n"
|
"movd %%xmm0,%k5 \n"
|
||||||
#endif
|
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
|
||||||
#if defined(__x86_64__) && defined(__native_client__)
|
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
|
||||||
BUNDLEALIGN
|
|
||||||
"lea (%q0,%q1,1),%%r14d \n"
|
|
||||||
"movd (%%r15,%%r14,1),%%xmm0 \n"
|
|
||||||
"lea (%q0,%q5,1),%%r14d \n"
|
|
||||||
"movd (%%r15,%%r14,1),%%xmm6 \n"
|
|
||||||
#else
|
|
||||||
"movd (%0,%1,1),%%xmm0 \n"
|
|
||||||
"movd (%0,%5,1),%%xmm6 \n"
|
|
||||||
#endif
|
|
||||||
"punpckldq %%xmm6,%%xmm0 \n"
|
"punpckldq %%xmm6,%%xmm0 \n"
|
||||||
"addps %%xmm4,%%xmm3 \n"
|
"addps %%xmm4,%%xmm3 \n"
|
||||||
"sub $0x4,%4 \n"
|
"sub $0x4,%4 \n"
|
||||||
@ -5331,17 +5295,8 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
|||||||
"packssdw %%xmm0,%%xmm0 \n"
|
"packssdw %%xmm0,%%xmm0 \n"
|
||||||
"pmaddwd %%xmm5,%%xmm0 \n"
|
"pmaddwd %%xmm5,%%xmm0 \n"
|
||||||
"addps %%xmm7,%%xmm2 \n"
|
"addps %%xmm7,%%xmm2 \n"
|
||||||
"movd %%xmm0,%1 \n"
|
"movd %%xmm0,%k1 \n"
|
||||||
#if defined(__x86_64__)
|
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
|
||||||
"and $0x0fffffff,%1 \n"
|
|
||||||
#endif
|
|
||||||
#if defined(__x86_64__) && defined(__native_client__)
|
|
||||||
BUNDLEALIGN
|
|
||||||
"lea (%q0,%q1,1),%%r14d \n"
|
|
||||||
"movd (%%r15,%%r14,1),%%xmm0 \n"
|
|
||||||
#else
|
|
||||||
"movd (%0,%1,1),%%xmm0 \n"
|
|
||||||
#endif
|
|
||||||
"sub $0x1,%4 \n"
|
"sub $0x1,%4 \n"
|
||||||
"movd %%xmm0," MEMACCESS(2) " \n"
|
"movd %%xmm0," MEMACCESS(2) " \n"
|
||||||
"lea " MEMLEA(0x04,2) ",%2 \n"
|
"lea " MEMLEA(0x04,2) ",%2 \n"
|
||||||
|
|||||||
@ -218,15 +218,15 @@ static int ARGBClipTestFilter(int src_width, int src_height,
|
|||||||
TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2)
|
TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2)
|
||||||
|
|
||||||
// TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2.
|
// TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2.
|
||||||
TEST_FACTOR(1, 1 / 1, 1 / 1)
|
// TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||||
TEST_FACTOR(2, 1 / 2, 1 / 2)
|
TEST_FACTOR(2, 1 / 2, 1 / 2)
|
||||||
TEST_FACTOR(4, 1 / 4, 1 / 4)
|
// TEST_FACTOR(4, 1 / 4, 1 / 4)
|
||||||
TEST_FACTOR(8, 1 / 8, 1 / 8)
|
// TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||||
TEST_FACTOR(16, 1 / 16, 1 / 16)
|
// TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||||
TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
// TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||||
TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
// TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||||
TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
// TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||||
TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
// TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||||
#undef TEST_FACTOR1
|
#undef TEST_FACTOR1
|
||||||
#undef TEST_FACTOR
|
#undef TEST_FACTOR
|
||||||
|
|
||||||
@ -262,11 +262,11 @@ TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
|||||||
TEST_SCALETO1(name, width, height, Linear, 2) \
|
TEST_SCALETO1(name, width, height, Linear, 2) \
|
||||||
TEST_SCALETO1(name, width, height, Bilinear, 2)
|
TEST_SCALETO1(name, width, height, Bilinear, 2)
|
||||||
|
|
||||||
TEST_SCALETO(ARGBScale, 640, 360)
|
TEST_SCALETO(DISABLED_ARGBScale, 640, 360)
|
||||||
TEST_SCALETO(ARGBScale, 853, 480)
|
TEST_SCALETO(DISABLED_ARGBScale, 853, 480)
|
||||||
TEST_SCALETO(ARGBScale, 1280, 720)
|
TEST_SCALETO(ARGBScale, 1280, 720)
|
||||||
TEST_SCALETO(ARGBScale, 1280, 800)
|
TEST_SCALETO(DISABLED_ARGBScale, 1280, 800)
|
||||||
TEST_SCALETO(ARGBScale, 1366, 768)
|
TEST_SCALETO(DISABLED_ARGBScale, 1366, 768)
|
||||||
TEST_SCALETO(ARGBScale, 1920, 1080)
|
TEST_SCALETO(ARGBScale, 1920, 1080)
|
||||||
#undef TEST_SCALETO1
|
#undef TEST_SCALETO1
|
||||||
#undef TEST_SCALETO
|
#undef TEST_SCALETO
|
||||||
|
|||||||
@ -150,15 +150,15 @@ static int TestFilter(int src_width, int src_height,
|
|||||||
TEST_FACTOR1(name, Box, hfactor, vfactor, 3) \
|
TEST_FACTOR1(name, Box, hfactor, vfactor, 3) \
|
||||||
|
|
||||||
// TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2.
|
// TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2.
|
||||||
TEST_FACTOR(1, 1 / 1, 1 / 1)
|
// TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||||
TEST_FACTOR(2, 1 / 2, 1 / 2)
|
TEST_FACTOR(2, 1 / 2, 1 / 2)
|
||||||
TEST_FACTOR(4, 1 / 4, 1 / 4)
|
// TEST_FACTOR(4, 1 / 4, 1 / 4)
|
||||||
TEST_FACTOR(8, 1 / 8, 1 / 8)
|
// TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||||
TEST_FACTOR(16, 1 / 16, 1 / 16)
|
// TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||||
TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
// TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||||
TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
// TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||||
TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
// TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||||
TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
// TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||||
#undef TEST_FACTOR1
|
#undef TEST_FACTOR1
|
||||||
#undef TEST_FACTOR
|
#undef TEST_FACTOR
|
||||||
|
|
||||||
@ -183,12 +183,12 @@ TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
|||||||
TEST_SCALETO1(name, width, height, Bilinear, 3) \
|
TEST_SCALETO1(name, width, height, Bilinear, 3) \
|
||||||
TEST_SCALETO1(name, width, height, Box, 3)
|
TEST_SCALETO1(name, width, height, Box, 3)
|
||||||
|
|
||||||
TEST_SCALETO(Scale, 640, 360)
|
TEST_SCALETO(DISABLED_Scale, 640, 360)
|
||||||
TEST_SCALETO(Scale, 853, 480)
|
TEST_SCALETO(DISABLED_Scale, 853, 480)
|
||||||
TEST_SCALETO(Scale, 1280, 720)
|
TEST_SCALETO(Scale, 1280, 720)
|
||||||
TEST_SCALETO(Scale, 1280, 800)
|
TEST_SCALETO(DISABLED_Scale, 1280, 800)
|
||||||
TEST_SCALETO(Scale, 1366, 768)
|
TEST_SCALETO(DISABLED_Scale, 1366, 768)
|
||||||
TEST_SCALETO(Scale, 1920, 1080)
|
TEST_SCALETO(DISABLED_Scale, 1920, 1080)
|
||||||
#undef TEST_SCALETO1
|
#undef TEST_SCALETO1
|
||||||
#undef TEST_SCALETO
|
#undef TEST_SCALETO
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user