Fix RotatePlane by 90 on Neon when source width is not a multiple of 8

Bug: b/220888716, b/218875554, b/220205245
Change-Id: I17e118ac9b9a7013386a5f0ad27a2dd249474ae5
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3483576
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2022-02-23 00:46:55 -08:00 committed by libyuv LUCI CQ
parent 3b8c86d23a
commit e77531f6f1
9 changed files with 128 additions and 136 deletions

View File

@ -1864,7 +1864,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
int width, int width,
int height); int height);
// Convert I422 to RGBA with matrix. // Convert I420 to RGBA with matrix.
LIBYUV_API LIBYUV_API
int I420ToRGBAMatrix(const uint8_t* src_y, int I420ToRGBAMatrix(const uint8_t* src_y,
int src_stride_y, int src_stride_y,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1810 #define LIBYUV_VERSION 1811
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -29,10 +29,7 @@ void TransposePlane(const uint8_t* src,
int width, int width,
int height) { int height) {
int i = height; int i = height;
#if defined(HAS_TRANSPOSEWX16_MSA) #if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX)
void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width) = TransposeWx16_C;
#elif defined(HAS_TRANSPOSEWX16_LSX)
void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst, void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width) = TransposeWx16_C; int dst_stride, int width) = TransposeWx16_C;
#else #else
@ -40,25 +37,13 @@ void TransposePlane(const uint8_t* src,
int dst_stride, int width) = TransposeWx8_C; int dst_stride, int width) = TransposeWx8_C;
#endif #endif
#if defined(HAS_TRANSPOSEWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeWx16 = TransposeWx16_Any_MSA;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_MSA;
}
}
#elif defined(HAS_TRANSPOSEWX16_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
TransposeWx16 = TransposeWx16_Any_LSX;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_LSX;
}
}
#else
#if defined(HAS_TRANSPOSEWX8_NEON) #if defined(HAS_TRANSPOSEWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeWx8_Any_NEON;
if (IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_NEON; TransposeWx8 = TransposeWx8_NEON;
} }
}
#endif #endif
#if defined(HAS_TRANSPOSEWX8_SSSE3) #if defined(HAS_TRANSPOSEWX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
@ -76,17 +61,24 @@ void TransposePlane(const uint8_t* src,
} }
} }
#endif #endif
#endif /* defined(HAS_TRANSPOSEWX16_MSA) */
#if defined(HAS_TRANSPOSEWX16_MSA) #if defined(HAS_TRANSPOSEWX16_MSA)
// Work across the source in 16x16 tiles if (TestCpuFlag(kCpuHasMSA)) {
while (i >= 16) { TransposeWx16 = TransposeWx16_Any_MSA;
TransposeWx16(src, src_stride, dst, dst_stride, width); if (IS_ALIGNED(width, 16)) {
src += 16 * src_stride; // Go down 16 rows. TransposeWx16 = TransposeWx16_MSA;
dst += 16; // Move over 16 columns.
i -= 16;
} }
#elif defined(HAS_TRANSPOSEWX16_LSX) }
#endif
#if defined(HAS_TRANSPOSEWX16_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
TransposeWx16 = TransposeWx16_Any_LSX;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_LSX;
}
}
#endif
#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX)
// Work across the source in 16x16 tiles // Work across the source in 16x16 tiles
while (i >= 16) { while (i >= 16) {
TransposeWx16(src, src_stride, dst, dst_stride, width); TransposeWx16(src, src_stride, dst, dst_stride, width);

View File

@ -1503,15 +1503,15 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
// Disable all optimizations. // Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_); MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) { for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
benchmark_width_, benchmark_height_, 16); benchmark_height_, 16);
} }
// Enable optimizations. // Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_); MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) { for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
benchmark_width_, benchmark_height_, 16); benchmark_height_, 16);
} }
for (i = 0; i < y_plane_size; ++i) { for (i = 0; i < y_plane_size; ++i) {