InterpolateRow for Visual C

- remove InterpolateRow_SSSE3
- optimize ARGBToUV444MatrixRow_AVX2 to use unsigned pixels

5.7x faster on AMD Zen4

Was C
TestInterpolatePlane (144 ms)
TestInterpolatePlane_16 (142 ms)

Now AVX2
TestInterpolatePlane (25 ms)
TestInterpolatePlane_16 (48 ms)

Was signed
ARGBToJ444_Opt (157 ms)
Now unsigned
ARGBToJ444_Opt (155 ms)

Bug: None
Change-Id: I903109668ff9cfedaddad1ad75411393b3226f41
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7856498
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2026-05-18 16:52:12 -07:00
parent 9f751100d2
commit 9d98aaefe7
12 changed files with 113 additions and 249 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1940
Version: 1941
Revision: DEPS
License: BSD-3-Clause
License File: LICENSE

View File

@ -60,7 +60,6 @@ extern "C" {
#define HAS_I422TOYUY2ROW_SSE2
#define HAS_I444TOARGBROW_SSSE3
#define HAS_I444TORGB24ROW_SSSE3
#define HAS_INTERPOLATEROW_SSSE3
#define HAS_J400TOARGBROW_SSE2
#define HAS_J422TOARGBROW_SSSE3
#define HAS_MERGEUVROW_SSE2
@ -149,6 +148,8 @@ extern "C" {
#define HAS_MIRRORROW_AVX2
#define HAS_MIRRORSPLITUVROW_AVX2
#define HAS_MIRRORUVROW_AVX2
#define HAS_INTERPOLATEROW_16_AVX2
#define HAS_INTERPOLATEROW_AVX2
#endif
#if !defined(LIBYUV_DISABLE_X86) && defined(USE_ROW_GCC) && \
@ -172,7 +173,6 @@ extern "C" {
#define HAS_I422TORGBAROW_AVX2
#define HAS_I444TOARGBROW_AVX2
#define HAS_I444TORGB24ROW_AVX2
#define HAS_INTERPOLATEROW_AVX2
#define HAS_J422TOARGBROW_AVX2
#define HAS_MIRRORROW_AVX2
#define HAS_MIRRORSPLITUVROW_AVX2
@ -323,7 +323,6 @@ extern "C" {
#define HAS_I422TOUYVYROW_AVX2
#define HAS_I422TOYUY2ROW_AVX2
#define HAS_INTERPOLATEROW_16TO8_AVX2
#define HAS_INTERPOLATEROW_16_AVX2
#define HAS_MERGEAR64ROW_AVX2
#define HAS_MERGEARGB16TO8ROW_AVX2
#define HAS_MERGEARGBROW_AVX2
@ -6645,11 +6644,6 @@ void InterpolateRow_C(uint8_t* dst_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction);
void InterpolateRow_SSSE3(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride,
int dst_width,
int source_y_fraction);
void InterpolateRow_AVX2(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride,
@ -6680,11 +6674,6 @@ void InterpolateRow_Any_NEON(uint8_t* dst_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,
int width,
int source_y_fraction);
void InterpolateRow_Any_AVX2(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride_ptr,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1940
#define LIBYUV_VERSION 1941
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -1204,14 +1204,6 @@ int I422ToNV21(const uint8_t* src_y,
MergeUVRow = MergeUVRow_RVV;
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;

View File

@ -4332,14 +4332,6 @@ int InterpolatePlane(const uint8_t* src0,
height = 1;
src_stride0 = src_stride1 = dst_stride = 0;
}
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -5535,14 +5527,6 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;

View File

@ -1830,18 +1830,9 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
memcpy(dst_ptr + np * BPP, vout, r * BPP * sizeof(TD)); \
}
#ifdef HAS_INTERPOLATEROW_AVX2
#if defined(HAS_INTERPOLATEROW_AVX2)
ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
ANY11I(InterpolateRow_Any_SSSE3,
InterpolateRow_SSSE3,
uint8_t,
uint8_t,
1,
1,
15)
#endif
#ifdef HAS_INTERPOLATEROW_NEON
ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, uint8_t, 1, 1, 15)
#endif

View File

@ -8752,87 +8752,6 @@ void ARGBAffineRow_SSE2(const uint8_t* src_argb,
}
#endif // HAS_ARGBAFFINEROW_SSE2
#ifdef HAS_INTERPOLATEROW_SSSE3
// Bilinear filter 16x2 -> 16x1
void InterpolateRow_SSSE3(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction) {
asm volatile(
"sub %1,%0 \n"
"cmp $0x0,%3 \n"
"je 100f \n"
"cmp $0x80,%3 \n"
"je 50f \n"
"movd %3,%%xmm0 \n"
"neg %3 \n"
"add $0x100,%3 \n"
"movd %3,%%xmm5 \n"
"punpcklbw %%xmm0,%%xmm5 \n"
"punpcklwd %%xmm5,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
"mov $0x80808080,%%eax \n"
"movd %%eax,%%xmm4 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
// General purpose row blend.
LABELALIGN
"1: \n"
"movdqu (%1),%%xmm0 \n"
"movdqu 0x00(%1,%4,1),%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"psubb %%xmm4,%%xmm0 \n"
"psubb %%xmm4,%%xmm1 \n"
"movdqa %%xmm5,%%xmm2 \n"
"movdqa %%xmm5,%%xmm3 \n"
"pmaddubsw %%xmm0,%%xmm2 \n"
"pmaddubsw %%xmm1,%%xmm3 \n"
"paddw %%xmm4,%%xmm2 \n"
"paddw %%xmm4,%%xmm3 \n"
"psrlw $0x8,%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqu %%xmm2,0x00(%1,%0,1) \n"
"lea 0x10(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
"jmp 99f \n"
// Blend 50 / 50.
LABELALIGN
"50: \n"
"movdqu (%1),%%xmm0 \n"
"movdqu 0x00(%1,%4,1),%%xmm1 \n"
"pavgb %%xmm1,%%xmm0 \n"
"movdqu %%xmm0,0x00(%1,%0,1) \n"
"lea 0x10(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 50b \n"
"jmp 99f \n"
// Blend 100 / 0 - Copy row unchanged.
LABELALIGN
"100: \n"
"movdqu (%1),%%xmm0 \n"
"movdqu %%xmm0,0x00(%1,%0,1) \n"
"lea 0x10(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 100b \n"
"99: \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
"+rm"(width), // %2
"+r"(source_y_fraction) // %3
: "r"(src_stride) // %4
: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_INTERPOLATEROW_SSSE3
#ifdef HAS_INTERPOLATEROW_AVX2
// Bilinear filter 32x2 -> 32x1
void InterpolateRow_AVX2(uint8_t* dst_ptr,

View File

@ -121,19 +121,11 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
__m256i ymm5 = _mm256_set1_epi8((char)0x80);
__m256i ymm_u =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToU));
__m256i ymm_v =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToV));
__m256i ymm_add =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kAddUV));
__m256i ymm_u_bias = _mm256_maddubs_epi16(ymm_u, ymm5);
ymm_u_bias = _mm256_hadd_epi16(ymm_u_bias, ymm_u_bias);
__m256i ymm_add_u = _mm256_sub_epi16(ymm_add, ymm_u_bias);
__m256i ymm_v_bias = _mm256_maddubs_epi16(ymm_v, ymm5);
ymm_v_bias = _mm256_hadd_epi16(ymm_v_bias, ymm_v_bias);
__m256i ymm_add_v = _mm256_sub_epi16(ymm_add, ymm_v_bias);
__m256i ymm5 = _mm256_set1_epi16((short)0x8000);
__m256i perm_mask = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);
while (width > 0) {
@ -143,25 +135,15 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
__m256i ymm3 = _mm256_loadu_si256((const __m256i*)(src_argb + 96));
src_argb += 128;
__m256i ymm0_u = _mm256_sub_epi8(ymm0, ymm5);
__m256i ymm1_u = _mm256_sub_epi8(ymm1, ymm5);
__m256i ymm2_u = _mm256_sub_epi8(ymm2, ymm5);
__m256i ymm3_u = _mm256_sub_epi8(ymm3, ymm5);
__m256i ymm0_u = _mm256_maddubs_epi16(ymm0, ymm_u);
__m256i ymm1_u = _mm256_maddubs_epi16(ymm1, ymm_u);
__m256i ymm2_u = _mm256_maddubs_epi16(ymm2, ymm_u);
__m256i ymm3_u = _mm256_maddubs_epi16(ymm3, ymm_u);
__m256i ymm0_v = ymm0_u;
__m256i ymm1_v = ymm1_u;
__m256i ymm2_v = ymm2_u;
__m256i ymm3_v = ymm3_u;
ymm0_u = _mm256_maddubs_epi16(ymm_u, ymm0_u);
ymm1_u = _mm256_maddubs_epi16(ymm_u, ymm1_u);
ymm2_u = _mm256_maddubs_epi16(ymm_u, ymm2_u);
ymm3_u = _mm256_maddubs_epi16(ymm_u, ymm3_u);
ymm0_v = _mm256_maddubs_epi16(ymm_v, ymm0_v);
ymm1_v = _mm256_maddubs_epi16(ymm_v, ymm1_v);
ymm2_v = _mm256_maddubs_epi16(ymm_v, ymm2_v);
ymm3_v = _mm256_maddubs_epi16(ymm_v, ymm3_v);
__m256i ymm0_v = _mm256_maddubs_epi16(ymm0, ymm_v);
__m256i ymm1_v = _mm256_maddubs_epi16(ymm1, ymm_v);
__m256i ymm2_v = _mm256_maddubs_epi16(ymm2, ymm_v);
__m256i ymm3_v = _mm256_maddubs_epi16(ymm3, ymm_v);
ymm0_u = _mm256_hadd_epi16(ymm0_u, ymm1_u);
ymm2_u = _mm256_hadd_epi16(ymm2_u, ymm3_u);
@ -169,11 +151,11 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
ymm0_v = _mm256_hadd_epi16(ymm0_v, ymm1_v);
ymm2_v = _mm256_hadd_epi16(ymm2_v, ymm3_v);
ymm0_u = _mm256_add_epi16(ymm0_u, ymm_add_u);
ymm2_u = _mm256_add_epi16(ymm2_u, ymm_add_u);
ymm0_u = _mm256_sub_epi16(ymm5, ymm0_u);
ymm2_u = _mm256_sub_epi16(ymm5, ymm2_u);
ymm0_v = _mm256_add_epi16(ymm0_v, ymm_add_v);
ymm2_v = _mm256_add_epi16(ymm2_v, ymm_add_v);
ymm0_v = _mm256_sub_epi16(ymm5, ymm0_v);
ymm2_v = _mm256_sub_epi16(ymm5, ymm2_v);
ymm0_u = _mm256_srli_epi16(ymm0_u, 8);
ymm2_u = _mm256_srli_epi16(ymm2_u, 8);
@ -576,6 +558,101 @@ void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
}
#endif
#ifdef HAS_INTERPOLATEROW_AVX2
LIBYUV_TARGET_AVX2
void InterpolateRow_AVX2(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction) {
int y1 = source_y_fraction;
int y0 = 256 - y1;
const uint8_t* src_ptr1 = src_ptr + src_stride;
__m256i ymm_y = _mm256_set1_epi16((y1 << 8) | y0);
__m256i ymm_8080 = _mm256_set1_epi16(0x8080);
int i;
if (y1 == 0) {
for (i = 0; i < width; i += 32) {
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_loadu_si256((const __m256i*)(src_ptr + i)));
}
} else if (y1 == 128) {
for (i = 0; i < width; i += 32) {
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
_mm256_storeu_si256((__m256i*)(dst_ptr + i), _mm256_avg_epu8(row0, row1));
}
} else {
for (i = 0; i < width; i += 32) {
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
__m256i low = _mm256_unpacklo_epi8(row0, row1);
__m256i high = _mm256_unpackhi_epi8(row0, row1);
low = _mm256_sub_epi8(low, ymm_8080);
high = _mm256_sub_epi8(high, ymm_8080);
low = _mm256_maddubs_epi16(ymm_y, low);
high = _mm256_maddubs_epi16(ymm_y, high);
low = _mm256_add_epi16(low, ymm_8080);
high = _mm256_add_epi16(high, ymm_8080);
low = _mm256_srli_epi16(low, 8);
high = _mm256_srli_epi16(high, 8);
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_packus_epi16(low, high));
}
}
_mm256_zeroupper();
}
#endif
#ifdef HAS_INTERPOLATEROW_16_AVX2
LIBYUV_TARGET_AVX2
void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
const uint16_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction) {
int y1 = source_y_fraction;
int y0 = 256 - y1;
const uint16_t* src_ptr1 = src_ptr + src_stride;
__m256i ymm_y = _mm256_set1_epi32((y1 << 16) | y0);
__m256i ymm_8000 = _mm256_set1_epi16((short)0x8000);
__m256i ymm_round = _mm256_set1_epi32(8388736); // 0x800000 + 128
int i;
if (y1 == 0) {
for (i = 0; i < width; i += 16) {
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_loadu_si256((const __m256i*)(src_ptr + i)));
}
} else if (y1 == 128) {
for (i = 0; i < width; i += 16) {
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
_mm256_storeu_si256((__m256i*)(dst_ptr + i), _mm256_avg_epu16(row0, row1));
}
} else {
for (i = 0; i < width; i += 16) {
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
__m256i row0l = _mm256_unpacklo_epi16(row0, row1);
__m256i row0h = _mm256_unpackhi_epi16(row0, row1);
row0l = _mm256_sub_epi16(row0l, ymm_8000);
row0h = _mm256_sub_epi16(row0h, ymm_8000);
__m256i resl = _mm256_madd_epi16(row0l, ymm_y);
__m256i resh = _mm256_madd_epi16(row0h, ymm_y);
resl = _mm256_add_epi32(resl, ymm_round);
resh = _mm256_add_epi32(resh, ymm_round);
resl = _mm256_srai_epi32(resl, 8);
resh = _mm256_srai_epi32(resh, 8);
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_packus_epi32(resl, resh));
}
}
_mm256_zeroupper();
}
#endif
#ifdef HAS_ARGBMIRRORROW_AVX2
LIBYUV_TARGET_AVX2
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {

View File

@ -1076,14 +1076,6 @@ static int ScalePlaneBilinearDown(int src_width,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -1196,14 +1188,6 @@ static int ScalePlaneBilinearDown_16(int src_width,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_16_Any_SSSE3;
@ -1290,14 +1274,6 @@ static int ScalePlaneBilinearUp(int src_width,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -1779,14 +1755,6 @@ static int ScalePlaneBilinearUp_16(int src_width,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_16_Any_SSSE3;

View File

@ -331,14 +331,6 @@ static int ScaleARGBBilinearDown(int src_width,
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
src_argb += xl * 4;
x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -454,14 +446,6 @@ static int ScaleARGBBilinearUp(int src_width,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -706,14 +690,6 @@ static int ScaleYUVToARGBBilinearUp(int src_width,
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;

View File

@ -1636,14 +1636,6 @@ void ScalePlaneVertical(int src_height,
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * bpp;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -1718,14 +1710,6 @@ void ScalePlaneVertical_16(int src_height,
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(dst_width_words, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_16_Any_SSSE3;

View File

@ -363,14 +363,6 @@ static int ScaleUVBilinearDown(int src_width,
clip_src_width = (int)(xr - xl) * 2; // Width aligned to 2.
src_uv += xl * 2;
x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -473,14 +465,6 @@ static int ScaleUVBilinearUp(int src_width,
int dst_width, int x, int dx) =
filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;