mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Report of affine to nacl using %k0
BUG=none TEST=none R=johannkoenig@google.com Review URL: https://webrtc-codereview.appspot.com/3929004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@855 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
e8c74b61d3
commit
a0630d77f0
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 855
|
||||
Version: 856
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 855
|
||||
#define LIBYUV_VERSION 856
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -5230,11 +5230,7 @@ void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
|
||||
#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
|
||||
|
||||
#ifdef HAS_ARGBAFFINEROW_SSE2
|
||||
// TODO(fbarchard): Find 64 bit way to avoid masking.
|
||||
// Copy ARGB pixels from source image with slope to a row of destination.
|
||||
// Caveat - in 64 bit, movd is used with 64 bit gpr due to Mac gcc producing
|
||||
// an error if movq is used. movd %%xmm0,%1
|
||||
|
||||
LIBYUV_API
|
||||
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||
uint8* dst_argb, const float* src_dudv, int width) {
|
||||
@ -5263,56 +5259,24 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||
// 4 pixel loop \n"
|
||||
".p2align 4 \n"
|
||||
"40: \n"
|
||||
"cvttps2dq %%xmm2,%%xmm0 \n"
|
||||
"cvttps2dq %%xmm3,%%xmm1 \n"
|
||||
"packssdw %%xmm1,%%xmm0 \n"
|
||||
"pmaddwd %%xmm5,%%xmm0 \n"
|
||||
#if defined(__x86_64__)
|
||||
// TODO(fbarchard): use a real movd to zero upper with %w1 for x64 and nacl.
|
||||
"movd %%xmm0,%1 \n"
|
||||
"mov %1,%5 \n"
|
||||
"and $0x0fffffff,%1 \n"
|
||||
"shr $32,%5 \n"
|
||||
"pshufd $0xEE,%%xmm0,%%xmm0 \n"
|
||||
#else
|
||||
"movd %%xmm0,%1 \n"
|
||||
"cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2
|
||||
"cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2
|
||||
"packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts
|
||||
"pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride
|
||||
"movd %%xmm0,%k1 \n"
|
||||
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%5 \n"
|
||||
"movd %%xmm0,%k5 \n"
|
||||
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__native_client__)
|
||||
BUNDLEALIGN
|
||||
"lea (%q0,%q1,1),%%r14d \n"
|
||||
"movd (%%r15,%%r14,1),%%xmm1 \n"
|
||||
"lea (%q0,%q5,1),%%r14d \n"
|
||||
"movd (%%r15,%%r14,1),%%xmm6 \n"
|
||||
#else
|
||||
"movd (%0,%1,1),%%xmm1 \n"
|
||||
"movd (%0,%5,1),%%xmm6 \n"
|
||||
#endif
|
||||
MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
|
||||
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
|
||||
"punpckldq %%xmm6,%%xmm1 \n"
|
||||
"addps %%xmm4,%%xmm2 \n"
|
||||
"movq %%xmm1," MEMACCESS(2) " \n"
|
||||
#if defined(__x86_64__)
|
||||
"movd %%xmm0,%1 \n"
|
||||
"mov %1,%5 \n"
|
||||
"and $0x0fffffff,%1 \n"
|
||||
"shr $32,%5 \n"
|
||||
#else
|
||||
"movd %%xmm0,%1 \n"
|
||||
"movd %%xmm0,%k1 \n"
|
||||
"pshufd $0x39,%%xmm0,%%xmm0 \n"
|
||||
"movd %%xmm0,%5 \n"
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__native_client__)
|
||||
BUNDLEALIGN
|
||||
"lea (%q0,%q1,1),%%r14d \n"
|
||||
"movd (%%r15,%%r14,1),%%xmm0 \n"
|
||||
"lea (%q0,%q5,1),%%r14d \n"
|
||||
"movd (%%r15,%%r14,1),%%xmm6 \n"
|
||||
#else
|
||||
"movd (%0,%1,1),%%xmm0 \n"
|
||||
"movd (%0,%5,1),%%xmm6 \n"
|
||||
#endif
|
||||
"movd %%xmm0,%k5 \n"
|
||||
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
|
||||
MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6
|
||||
"punpckldq %%xmm6,%%xmm0 \n"
|
||||
"addps %%xmm4,%%xmm3 \n"
|
||||
"sub $0x4,%4 \n"
|
||||
@ -5331,17 +5295,8 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||
"packssdw %%xmm0,%%xmm0 \n"
|
||||
"pmaddwd %%xmm5,%%xmm0 \n"
|
||||
"addps %%xmm7,%%xmm2 \n"
|
||||
"movd %%xmm0,%1 \n"
|
||||
#if defined(__x86_64__)
|
||||
"and $0x0fffffff,%1 \n"
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__native_client__)
|
||||
BUNDLEALIGN
|
||||
"lea (%q0,%q1,1),%%r14d \n"
|
||||
"movd (%%r15,%%r14,1),%%xmm0 \n"
|
||||
#else
|
||||
"movd (%0,%1,1),%%xmm0 \n"
|
||||
#endif
|
||||
"movd %%xmm0,%k1 \n"
|
||||
MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0
|
||||
"sub $0x1,%4 \n"
|
||||
"movd %%xmm0," MEMACCESS(2) " \n"
|
||||
"lea " MEMLEA(0x04,2) ",%2 \n"
|
||||
|
||||
@ -218,15 +218,15 @@ static int ARGBClipTestFilter(int src_width, int src_height,
|
||||
TEST_FACTOR1(name, Bilinear, hfactor, vfactor, 2)
|
||||
|
||||
// TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2.
|
||||
TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||
// TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||
TEST_FACTOR(2, 1 / 2, 1 / 2)
|
||||
TEST_FACTOR(4, 1 / 4, 1 / 4)
|
||||
TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||
TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||
TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||
TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||
TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||
TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
// TEST_FACTOR(4, 1 / 4, 1 / 4)
|
||||
// TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||
// TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||
// TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||
// TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||
// TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||
// TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
#undef TEST_FACTOR1
|
||||
#undef TEST_FACTOR
|
||||
|
||||
@ -262,11 +262,11 @@ TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
TEST_SCALETO1(name, width, height, Linear, 2) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 2)
|
||||
|
||||
TEST_SCALETO(ARGBScale, 640, 360)
|
||||
TEST_SCALETO(ARGBScale, 853, 480)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 640, 360)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 853, 480)
|
||||
TEST_SCALETO(ARGBScale, 1280, 720)
|
||||
TEST_SCALETO(ARGBScale, 1280, 800)
|
||||
TEST_SCALETO(ARGBScale, 1366, 768)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 1280, 800)
|
||||
TEST_SCALETO(DISABLED_ARGBScale, 1366, 768)
|
||||
TEST_SCALETO(ARGBScale, 1920, 1080)
|
||||
#undef TEST_SCALETO1
|
||||
#undef TEST_SCALETO
|
||||
|
||||
@ -150,15 +150,15 @@ static int TestFilter(int src_width, int src_height,
|
||||
TEST_FACTOR1(name, Box, hfactor, vfactor, 3) \
|
||||
|
||||
// TODO(fbarchard): ScaleDownBy1 should be lossless, but Box has error of 2.
|
||||
TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||
// TEST_FACTOR(1, 1 / 1, 1 / 1)
|
||||
TEST_FACTOR(2, 1 / 2, 1 / 2)
|
||||
TEST_FACTOR(4, 1 / 4, 1 / 4)
|
||||
TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||
TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||
TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||
TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||
TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||
TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
// TEST_FACTOR(4, 1 / 4, 1 / 4)
|
||||
// TEST_FACTOR(8, 1 / 8, 1 / 8)
|
||||
// TEST_FACTOR(16, 1 / 16, 1 / 16)
|
||||
// TEST_FACTOR(2by3, 2 / 3, 2 / 3)
|
||||
// TEST_FACTOR(3by4, 3 / 4, 3 / 4)
|
||||
// TEST_FACTOR(3by8, 3 / 8, 3 / 8)
|
||||
// TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
#undef TEST_FACTOR1
|
||||
#undef TEST_FACTOR
|
||||
|
||||
@ -183,12 +183,12 @@ TEST_FACTOR(Vertical2by3, 1, 2 / 3)
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 3) \
|
||||
TEST_SCALETO1(name, width, height, Box, 3)
|
||||
|
||||
TEST_SCALETO(Scale, 640, 360)
|
||||
TEST_SCALETO(Scale, 853, 480)
|
||||
TEST_SCALETO(DISABLED_Scale, 640, 360)
|
||||
TEST_SCALETO(DISABLED_Scale, 853, 480)
|
||||
TEST_SCALETO(Scale, 1280, 720)
|
||||
TEST_SCALETO(Scale, 1280, 800)
|
||||
TEST_SCALETO(Scale, 1366, 768)
|
||||
TEST_SCALETO(Scale, 1920, 1080)
|
||||
TEST_SCALETO(DISABLED_Scale, 1280, 800)
|
||||
TEST_SCALETO(DISABLED_Scale, 1366, 768)
|
||||
TEST_SCALETO(DISABLED_Scale, 1920, 1080)
|
||||
#undef TEST_SCALETO1
|
||||
#undef TEST_SCALETO
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user