mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
InterpolateRow_AVX2 use AVX2 instead of ERMS for 100%
Bug: b/210066781 Change-Id: I709e403f03bd6b9f8fe693b165b242b784076fe0 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3329072 Reviewed-by: richard winterton <rrwinterton@gmail.com> Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
fdc71956bd
commit
78625492cb
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1806
|
Version: 1807
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1806
|
#define LIBYUV_VERSION 1807
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
@ -8124,7 +8124,7 @@ void ARGBAffineRow_SSE2(const uint8_t* src_argb,
|
|||||||
void InterpolateRow_SSSE3(uint8_t* dst_ptr,
|
void InterpolateRow_SSSE3(uint8_t* dst_ptr,
|
||||||
const uint8_t* src_ptr,
|
const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int dst_width,
|
int width,
|
||||||
int source_y_fraction) {
|
int source_y_fraction) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"sub %1,%0 \n"
|
"sub %1,%0 \n"
|
||||||
@ -8193,7 +8193,7 @@ void InterpolateRow_SSSE3(uint8_t* dst_ptr,
|
|||||||
"99: \n"
|
"99: \n"
|
||||||
: "+r"(dst_ptr), // %0
|
: "+r"(dst_ptr), // %0
|
||||||
"+r"(src_ptr), // %1
|
"+r"(src_ptr), // %1
|
||||||
"+rm"(dst_width), // %2
|
"+rm"(width), // %2
|
||||||
"+r"(source_y_fraction) // %3
|
"+r"(source_y_fraction) // %3
|
||||||
: "r"((intptr_t)(src_stride)) // %4
|
: "r"((intptr_t)(src_stride)) // %4
|
||||||
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
@ -8205,13 +8205,12 @@ void InterpolateRow_SSSE3(uint8_t* dst_ptr,
|
|||||||
void InterpolateRow_AVX2(uint8_t* dst_ptr,
|
void InterpolateRow_AVX2(uint8_t* dst_ptr,
|
||||||
const uint8_t* src_ptr,
|
const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int dst_width,
|
int width,
|
||||||
int source_y_fraction) {
|
int source_y_fraction) {
|
||||||
size_t width_tmp = (size_t)(dst_width);
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
"sub %1,%0 \n"
|
||||||
"cmp $0x0,%3 \n"
|
"cmp $0x0,%3 \n"
|
||||||
"je 100f \n"
|
"je 100f \n"
|
||||||
"sub %1,%0 \n"
|
|
||||||
"cmp $0x80,%3 \n"
|
"cmp $0x80,%3 \n"
|
||||||
"je 50f \n"
|
"je 50f \n"
|
||||||
|
|
||||||
@ -8262,15 +8261,17 @@ void InterpolateRow_AVX2(uint8_t* dst_ptr,
|
|||||||
// Blend 100 / 0 - Copy row unchanged.
|
// Blend 100 / 0 - Copy row unchanged.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"100: \n"
|
"100: \n"
|
||||||
"rep movsb \n"
|
"vmovdqu (%1),%%ymm0 \n"
|
||||||
"jmp 999f \n"
|
"vmovdqu %%ymm0,0x00(%1,%0,1) \n"
|
||||||
|
"lea 0x20(%1),%1 \n"
|
||||||
|
"sub $0x20,%2 \n"
|
||||||
|
"jg 100b \n"
|
||||||
|
|
||||||
"99: \n"
|
"99: \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
"999: \n"
|
: "+r"(dst_ptr), // %0
|
||||||
: "+D"(dst_ptr), // %0
|
"+r"(src_ptr), // %1
|
||||||
"+S"(src_ptr), // %1
|
"+r"(width), // %2
|
||||||
"+c"(width_tmp), // %2
|
|
||||||
"+r"(source_y_fraction) // %3
|
"+r"(source_y_fraction) // %3
|
||||||
: "r"((intptr_t)(src_stride)) // %4
|
: "r"((intptr_t)(src_stride)) // %4
|
||||||
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm4", "xmm5");
|
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm4", "xmm5");
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user