libyuv/source/rotate_neon.cc
Wan-Teh Chang ebe6fef903 Fix integer overflow in multiplications of stride
Audit all occurrences of "stride *" in the libyuv source tree. Ensure
that these multiplications are performed in the ptrdiff_t type.

For functions not declared in a public header (such as static
functions), prefer to declare the stride parameters (typically named
src_stride and dst_stride) and related stride local variables as
ptrdiff_t. If this is not possible, add ptrdiff_t casts to the stride
parameters in multiplications. If intptr_t or int64_t casts were used,
change them to ptrdiff_t casts.

Bug: chromium:516986556
Change-Id: I6cd8a8eb00cbb5380db828bf83e4d89ff95891f3
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/7882967
Commit-Queue: Wan-Teh Chang <wtc@google.com>
Reviewed-by: Frank Barchard <fbarchard@google.com>
2026-05-28 14:12:37 -07:00

220 lines
9.1 KiB
C++

/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate_row.h"
#include "libyuv/row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
!defined(__aarch64__)
void TransposeWx8_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width) {
const uint8_t* temp;
asm volatile(
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %[width], #8 \n"
"1: \n"
"mov %[temp], %[src] \n"
"vld1.8 {d0}, [%[temp]], %[src_stride] \n"
"vld1.8 {d1}, [%[temp]], %[src_stride] \n"
"vld1.8 {d2}, [%[temp]], %[src_stride] \n"
"vld1.8 {d3}, [%[temp]], %[src_stride] \n"
"vld1.8 {d4}, [%[temp]], %[src_stride] \n"
"vld1.8 {d5}, [%[temp]], %[src_stride] \n"
"vld1.8 {d6}, [%[temp]], %[src_stride] \n"
"vld1.8 {d7}, [%[temp]] \n"
"add %[src], #8 \n"
"vtrn.8 d1, d0 \n"
"vtrn.8 d3, d2 \n"
"vtrn.8 d5, d4 \n"
"vtrn.8 d7, d6 \n"
"subs %[width], #8 \n"
"vtrn.16 d1, d3 \n"
"vtrn.16 d0, d2 \n"
"vtrn.16 d5, d7 \n"
"vtrn.16 d4, d6 \n"
"vtrn.32 d1, d5 \n"
"vtrn.32 d0, d4 \n"
"vtrn.32 d3, d7 \n"
"vtrn.32 d2, d6 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"mov %[temp], %[dst] \n"
"vst1.8 {d1}, [%[temp]], %[dst_stride] \n"
"vst1.8 {d0}, [%[temp]], %[dst_stride] \n"
"vst1.8 {d3}, [%[temp]], %[dst_stride] \n"
"vst1.8 {d2}, [%[temp]], %[dst_stride] \n"
"vst1.8 {d5}, [%[temp]], %[dst_stride] \n"
"vst1.8 {d4}, [%[temp]], %[dst_stride] \n"
"vst1.8 {d7}, [%[temp]], %[dst_stride] \n"
"vst1.8 {d6}, [%[temp]] \n"
"add %[dst], %[dst], %[dst_stride], lsl #3 \n"
"bge 1b \n"
: [temp] "=&r"(temp), // %[temp]
[src] "+r"(src), // %[src]
[dst] "+r"(dst), // %[dst]
[width] "+r"(width) // %[width]
: [src_stride] "r"(src_stride), // %[src_stride]
[dst_stride] "r"(dst_stride) // %[dst_stride]
: "memory", "cc", "q0", "q1", "q2", "q3");
}
void TransposeUVWx8_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width) {
const uint8_t* temp;
asm volatile(
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %[width], #8 \n"
"1: \n"
"mov %[temp], %[src] \n"
"vld2.8 {d0, d1}, [%[temp]], %[src_stride] \n"
"vld2.8 {d2, d3}, [%[temp]], %[src_stride] \n"
"vld2.8 {d4, d5}, [%[temp]], %[src_stride] \n"
"vld2.8 {d6, d7}, [%[temp]], %[src_stride] \n"
"vld2.8 {d16, d17}, [%[temp]], %[src_stride] \n"
"vld2.8 {d18, d19}, [%[temp]], %[src_stride] \n"
"vld2.8 {d20, d21}, [%[temp]], %[src_stride] \n"
"vld2.8 {d22, d23}, [%[temp]] \n"
"add %[src], #8*2 \n"
"vtrn.8 q1, q0 \n"
"vtrn.8 q3, q2 \n"
"vtrn.8 q9, q8 \n"
"vtrn.8 q11, q10 \n"
"subs %[width], #8 \n"
"vtrn.16 q1, q3 \n"
"vtrn.16 q0, q2 \n"
"vtrn.16 q9, q11 \n"
"vtrn.16 q8, q10 \n"
"vtrn.32 q1, q9 \n"
"vtrn.32 q0, q8 \n"
"vtrn.32 q3, q11 \n"
"vtrn.32 q2, q10 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"vrev16.8 q8, q8 \n"
"vrev16.8 q9, q9 \n"
"vrev16.8 q10, q10 \n"
"vrev16.8 q11, q11 \n"
"mov %[temp], %[dst_a] \n"
"vst1.8 {d2}, [%[temp]], %[dst_stride_a] \n"
"vst1.8 {d0}, [%[temp]], %[dst_stride_a] \n"
"vst1.8 {d6}, [%[temp]], %[dst_stride_a] \n"
"vst1.8 {d4}, [%[temp]], %[dst_stride_a] \n"
"vst1.8 {d18}, [%[temp]], %[dst_stride_a] \n"
"vst1.8 {d16}, [%[temp]], %[dst_stride_a] \n"
"vst1.8 {d22}, [%[temp]], %[dst_stride_a] \n"
"vst1.8 {d20}, [%[temp]] \n"
"add %[dst_a], %[dst_a], %[dst_stride_a], lsl #3 \n"
"mov %[temp], %[dst_b] \n"
"vst1.8 {d3}, [%[temp]], %[dst_stride_b] \n"
"vst1.8 {d1}, [%[temp]], %[dst_stride_b] \n"
"vst1.8 {d7}, [%[temp]], %[dst_stride_b] \n"
"vst1.8 {d5}, [%[temp]], %[dst_stride_b] \n"
"vst1.8 {d19}, [%[temp]], %[dst_stride_b] \n"
"vst1.8 {d17}, [%[temp]], %[dst_stride_b] \n"
"vst1.8 {d23}, [%[temp]], %[dst_stride_b] \n"
"vst1.8 {d21}, [%[temp]] \n"
"add %[dst_b], %[dst_b], %[dst_stride_b], lsl #3 \n"
"bge 1b \n"
: [temp] "=&r"(temp), // %[temp]
[src] "+r"(src), // %[src]
[dst_a] "+r"(dst_a), // %[dst_a]
[dst_b] "+r"(dst_b), // %[dst_b]
[width] "+r"(width) // %[width]
: [src_stride] "r"(src_stride), // %[src_stride]
[dst_stride_a] "r"(dst_stride_a), // %[dst_stride_a]
[dst_stride_b] "r"(dst_stride_b) // %[dst_stride_b]
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
}
// Transpose 32 bit values (ARGB)
void Transpose4x4_32_NEON(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width) {
const uint8_t* src1 = src + src_stride;
const uint8_t* src2 = src1 + src_stride;
const uint8_t* src3 = src2 + src_stride;
uint8_t* dst1 = dst + dst_stride;
uint8_t* dst2 = dst1 + dst_stride;
uint8_t* dst3 = dst2 + dst_stride;
asm volatile(
// Main loop transpose 4x4. Read a column, write a row.
"1: \n"
"vld4.32 {d0[0], d2[0], d4[0], d6[0]}, [%0], %9 \n"
"vld4.32 {d0[1], d2[1], d4[1], d6[1]}, [%1], %9 \n"
"vld4.32 {d1[0], d3[0], d5[0], d7[0]}, [%2], %9 \n"
"vld4.32 {d1[1], d3[1], d5[1], d7[1]}, [%3], %9 \n"
"subs %8, %8, #4 \n" // w -= 4
"vst1.8 {q0}, [%4]! \n"
"vst1.8 {q1}, [%5]! \n"
"vst1.8 {q2}, [%6]! \n"
"vst1.8 {q3}, [%7]! \n"
"bgt 1b \n"
: "+r"(src), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(dst), // %4
"+r"(dst1), // %5
"+r"(dst2), // %6
"+r"(dst3), // %7
"+r"(width) // %8
: "r"((ptrdiff_t)src_stride * 4) // %9
: "memory", "cc", "q0", "q1", "q2", "q3");
}
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif