mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
add rows handle height of 1 using a more general while-style loop.
BUG=none TESTED=try bots Review URL: https://webrtc-codereview.appspot.com/45999004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1366 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
c7161d1c36
commit
b5ea79d845
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1365
|
||||
Version: 1366
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1365
|
||||
#define LIBYUV_VERSION 1366
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -745,8 +745,8 @@ static void ScalePlaneBox(int src_width, int src_height,
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
|
||||
&x, &y, &dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
// TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
|
||||
if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
|
||||
// TODO(fbarchard): Remove this and make AddRows handle odd width.
|
||||
if (!IS_ALIGNED(src_width, 16)) {
|
||||
uint8* dst = dst_ptr;
|
||||
int j;
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
|
||||
@ -573,44 +573,38 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
// Reads 16xN bytes and produces 16 shorts at a time.
|
||||
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
int tmp_height = 0;
|
||||
intptr_t tmp_src = 0;
|
||||
asm volatile (
|
||||
"pxor %%xmm4,%%xmm4 \n"
|
||||
"sub $0x1,%5 \n"
|
||||
"mov %0,%3 \n" // row pointer
|
||||
"mov %5,%2 \n" // height
|
||||
"pxor %%xmm0,%%xmm0 \n" // clear accumulators
|
||||
"pxor %%xmm1,%%xmm1 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"mov %0,%3 \n"
|
||||
"add %6,%0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm4,%%xmm0 \n"
|
||||
"punpckhbw %%xmm4,%%xmm1 \n"
|
||||
"mov %5,%2 \n"
|
||||
"test %2,%2 \n"
|
||||
"je 3f \n"
|
||||
|
||||
LABELALIGN
|
||||
"2: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm2 \n"
|
||||
"add %6,%0 \n"
|
||||
"movdqu " MEMACCESS(3) ",%%xmm2 \n"
|
||||
"add %6,%3 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklbw %%xmm4,%%xmm2 \n"
|
||||
"punpckhbw %%xmm4,%%xmm3 \n"
|
||||
"paddusw %%xmm2,%%xmm0 \n"
|
||||
"paddusw %%xmm3,%%xmm1 \n"
|
||||
"sub $0x1,%2 \n"
|
||||
"jg 2b \n"
|
||||
"jg 1b \n"
|
||||
|
||||
LABELALIGN
|
||||
"3: \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
|
||||
"lea " MEMLEA(0x10,3) ",%0 \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
|
||||
"mov %0,%3 \n" // row pointer
|
||||
"mov %5,%2 \n" // height
|
||||
"pxor %%xmm0,%%xmm0 \n" // clear accumulators
|
||||
"pxor %%xmm1,%%xmm1 \n"
|
||||
"sub $0x10,%4 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
@ -799,8 +793,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
|
||||
// Reads 4 pixels at a time.
|
||||
// Alignment requirement: dst_argb 16 byte aligned.
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
int src_stepx, uint8* dst_argb, int dst_width) {
|
||||
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
|
||||
intptr_t src_stepx_x12 = 0;
|
||||
asm volatile (
|
||||
|
||||
@ -708,11 +708,9 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
}
|
||||
|
||||
// Reads 16xN bytes and produces 16 shorts at a time.
|
||||
// TODO(fbarchard): Make this handle 4xN bytes for any width ARGB.
|
||||
__declspec(naked)
|
||||
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width,
|
||||
int src_height) {
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -724,21 +722,14 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
mov ecx, [esp + 16 + 16] // dst_width
|
||||
mov ebx, [esp + 16 + 20] // height
|
||||
pxor xmm4, xmm4
|
||||
dec ebx
|
||||
mov eax, esi // row pointer
|
||||
mov ebp, ebx // height
|
||||
pxor xmm0, xmm0 // clear accumulators
|
||||
pxor xmm1, xmm1
|
||||
|
||||
xloop:
|
||||
// first row
|
||||
movdqu xmm0, [esi]
|
||||
lea eax, [esi + edx]
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm4
|
||||
punpckhbw xmm1, xmm4
|
||||
lea esi, [esi + 16]
|
||||
mov ebp, ebx
|
||||
test ebp, ebp
|
||||
je ydone
|
||||
|
||||
// sum remaining rows
|
||||
// sum rows
|
||||
yloop:
|
||||
movdqu xmm2, [eax] // read 16 pixels
|
||||
lea eax, [eax + edx] // advance to next row
|
||||
@ -750,11 +741,14 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
sub ebp, 1
|
||||
jg yloop
|
||||
|
||||
ydone:
|
||||
movdqu [edi], xmm0
|
||||
movdqu [edi + 16], xmm1
|
||||
lea edi, [edi + 32]
|
||||
|
||||
lea edi, [edi + 32] // dst_ptr += 16
|
||||
lea esi, [esi + 16] // src_ptr += 16
|
||||
mov eax, esi // row pointer
|
||||
mov ebp, ebx // height
|
||||
pxor xmm0, xmm0 // clear accumulators
|
||||
pxor xmm1, xmm1
|
||||
sub ecx, 16
|
||||
jg xloop
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user