diff --git a/README.chromium b/README.chromium index 9e5f62ae9..9121b6b35 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1134 +Version: 1135 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 9bb923db9..36226cc53 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -201,8 +201,7 @@ extern "C" { #define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 #define HAS_I422TOARGBROW_AVX2 -// TODO(fbarchard): fix bug #376. -// #define HAS_INTERPOLATEROW_AVX2 +#define HAS_INTERPOLATEROW_AVX2 #define HAS_MERGEUVROW_AVX2 #define HAS_MIRRORROW_AVX2 #define HAS_SPLITUVROW_AVX2 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 8d04e0da8..5a3a9ce9f 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1134 +#define LIBYUV_VERSION 1135 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_any.cc b/source/row_any.cc index b1ede4e92..5bb84f720 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -581,24 +581,20 @@ YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, } #ifdef HAS_INTERPOLATEROW_AVX2 -NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, - InterpolateRow_C, 1, 1, 32) +NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, InterpolateRow_C, 1, 1, 31) #endif #ifdef HAS_INTERPOLATEROW_SSSE3 -NANY(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, - InterpolateRow_C, 1, 1, 15) +NANY(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, InterpolateRow_C, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_SSE2 -NANY(InterpolateRow_Any_SSE2, InterpolateRow_SSE2, - InterpolateRow_C, 1, 1, 15) +NANY(InterpolateRow_Any_SSE2, InterpolateRow_SSE2, InterpolateRow_C, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_NEON -NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON, - InterpolateRow_C, 1, 1, 15) +NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON, InterpolateRow_C, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_MIPS_DSPR2 -NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, - InterpolateRow_C, 1, 1, 3) +NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, InterpolateRow_C, + 1, 1, 3) #endif #undef NANY diff --git a/source/row_win.cc b/source/row_win.cc index bc3915d9a..20bc78f80 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -4972,11 +4972,11 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, #endif // HAS_ARGBAFFINEROW_SSE2 #ifdef HAS_INTERPOLATEROW_AVX2 -// Bilinear filter 16x2 -> 16x1 +// Bilinear filter 32x2 -> 32x1 __declspec(naked) __declspec(align(16)) void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) { __asm { push esi push edi @@ -5023,45 +5023,48 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, jg xloop jmp xloop99 - // Blend 25 / 75. - align 4 - xloop25: - vmovdqu ymm0, [esi] - vpavgb ymm0, ymm0, [esi + edx] - vpavgb ymm0, ymm0, [esi + edx] - sub ecx, 32 - vmovdqu [esi + edi], ymm0 - lea esi, [esi + 32] - jg xloop25 - jmp xloop99 + // Blend 25 / 75. + align 4 + xloop25: + vmovdqu ymm0, [esi] + vmovdqu ymm1, [esi + edx] + vpavgb ymm0, ymm0, ymm1 + vpavgb ymm0, ymm0, ymm1 + sub ecx, 32 + vmovdqu [esi + edi], ymm0 + lea esi, [esi + 32] + jg xloop25 + jmp xloop99 - // Blend 50 / 50. - align 4 - xloop50: - vmovdqu ymm0, [esi] - vpavgb ymm0, ymm0, [esi + edx] - sub ecx, 32 - vmovdqu [esi + edi], ymm0 - lea esi, [esi + 32] - jg xloop50 - jmp xloop99 + // Blend 50 / 50. + align 4 + xloop50: + vmovdqu ymm0, [esi] + vmovdqu ymm1, [esi + edx] + vpavgb ymm0, ymm0, ymm1 + sub ecx, 32 + vmovdqu [esi + edi], ymm0 + lea esi, [esi + 32] + jg xloop50 + jmp xloop99 - // Blend 75 / 25. - align 4 - xloop75: - vmovdqu ymm0, [esi + edx] - vpavgb ymm0, ymm0, [esi] - vpavgb ymm0, ymm0, [esi] - sub ecx, 32 - vmovdqu [esi + edi], ymm0 - lea esi, [esi + 32] - jg xloop75 - jmp xloop99 + // Blend 75 / 25. + align 4 + xloop75: + vmovdqu ymm1, [esi] + vmovdqu ymm0, [esi + edx] + vpavgb ymm0, ymm0, ymm1 + vpavgb ymm0, ymm0, ymm1 + sub ecx, 32 + vmovdqu [esi + edi], ymm0 + lea esi, [esi + 32] + jg xloop75 + jmp xloop99 - // Blend 100 / 0 - Copy row unchanged. - align 4 - xloop100: - rep movsb + // Blend 100 / 0 - Copy row unchanged. + align 4 + xloop100: + rep movsb xloop99: pop edi