Fix RotatePlane by 90 on Neon when source width is not a multiple of 8

Bug: b/220888716, b/218875554, b/220205245
Change-Id: I17e118ac9b9a7013386a5f0ad27a2dd249474ae5
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3483576
Reviewed-by: Mirko Bonadei <mbonadei@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2022-02-23 00:46:55 -08:00 committed by libyuv LUCI CQ
parent 3b8c86d23a
commit e77531f6f1
9 changed files with 128 additions and 136 deletions

View File

@ -1864,7 +1864,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
int width,
int height);
// Convert I422 to RGBA with matrix.
// Convert I420 to RGBA with matrix.
LIBYUV_API
int I420ToRGBAMatrix(const uint8_t* src_y,
int src_stride_y,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1810
#define LIBYUV_VERSION 1811
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -29,10 +29,7 @@ void TransposePlane(const uint8_t* src,
int width,
int height) {
int i = height;
#if defined(HAS_TRANSPOSEWX16_MSA)
void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width) = TransposeWx16_C;
#elif defined(HAS_TRANSPOSEWX16_LSX)
#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX)
void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst,
int dst_stride, int width) = TransposeWx16_C;
#else
@ -40,25 +37,13 @@ void TransposePlane(const uint8_t* src,
int dst_stride, int width) = TransposeWx8_C;
#endif
#if defined(HAS_TRANSPOSEWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeWx16 = TransposeWx16_Any_MSA;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_MSA;
}
}
#elif defined(HAS_TRANSPOSEWX16_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
TransposeWx16 = TransposeWx16_Any_LSX;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_LSX;
}
}
#else
#if defined(HAS_TRANSPOSEWX8_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
TransposeWx8 = TransposeWx8_Any_NEON;
if (IS_ALIGNED(width, 8)) {
TransposeWx8 = TransposeWx8_NEON;
}
}
#endif
#if defined(HAS_TRANSPOSEWX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
@ -76,17 +61,24 @@ void TransposePlane(const uint8_t* src,
}
}
#endif
#endif /* defined(HAS_TRANSPOSEWX16_MSA) */
#if defined(HAS_TRANSPOSEWX16_MSA)
// Work across the source in 16x16 tiles
while (i >= 16) {
TransposeWx16(src, src_stride, dst, dst_stride, width);
src += 16 * src_stride; // Go down 16 rows.
dst += 16; // Move over 16 columns.
i -= 16;
if (TestCpuFlag(kCpuHasMSA)) {
TransposeWx16 = TransposeWx16_Any_MSA;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_MSA;
}
#elif defined(HAS_TRANSPOSEWX16_LSX)
}
#endif
#if defined(HAS_TRANSPOSEWX16_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
TransposeWx16 = TransposeWx16_Any_LSX;
if (IS_ALIGNED(width, 16)) {
TransposeWx16 = TransposeWx16_LSX;
}
}
#endif
#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX)
// Work across the source in 16x16 tiles
while (i >= 16) {
TransposeWx16(src, src_stride, dst, dst_stride, width);

View File

@ -1503,15 +1503,15 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane(orig_y, orig_width, dst_c, benchmark_width_,
benchmark_width_, benchmark_height_, 16);
DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_,
benchmark_width_, benchmark_height_, 16);
DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
benchmark_height_, 16);
}
for (i = 0; i < y_plane_size; ++i) {