mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ubsan compliant '_any' functions using ptrdiff_t for pointer math
Bug: 416842099 Change-Id: I1e3c7bc1b363c11baeb3b529ee78e5ac8878c359 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6634217 Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
cd0ae0a222
commit
4ac0a3ae3d
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||||
Version: 1910
|
Version: 1911
|
||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
Shipped: yes
|
Shipped: yes
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1910
|
#define LIBYUV_VERSION 1911
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include "libyuv/row.h"
|
#include "libyuv/row.h"
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
#include <string.h> // For memset.
|
#include <string.h> // For memset.
|
||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
@ -43,12 +44,13 @@ extern "C" {
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n); \
|
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n, r); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
memcpy(vin, y_buf + np, r); \
|
||||||
memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
memcpy(vin + 64, u_buf + (np >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||||
memcpy(vin + 192, a_buf + n, r); \
|
memcpy(vin + 128, v_buf + (np >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||||
|
memcpy(vin + 192, a_buf + np, r); \
|
||||||
ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, MASK + 1); \
|
ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, MASK + 1); \
|
||||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
memcpy(dst_ptr + (np >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_MERGEARGBROW_SSE2
|
#ifdef HAS_MERGEARGBROW_SSE2
|
||||||
@ -76,17 +78,18 @@ ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
|
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n, r); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
memcpy(vin, y_buf + np, r); \
|
||||||
memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
memcpy(vin + 64, u_buf + (np >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||||
memcpy(vin + 192, a_buf + n, r); \
|
memcpy(vin + 128, v_buf + (np >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||||
|
memcpy(vin + 192, a_buf + np, r); \
|
||||||
if (width & 1) { \
|
if (width & 1) { \
|
||||||
vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1]; \
|
vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1]; \
|
||||||
vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \
|
vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \
|
||||||
} \
|
} \
|
||||||
ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants, \
|
ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants, \
|
||||||
MASK + 1); \
|
MASK + 1); \
|
||||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
memcpy(dst_ptr + (np >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_I444ALPHATOARGBROW_SSSE3
|
#ifdef HAS_I444ALPHATOARGBROW_SSSE3
|
||||||
@ -134,12 +137,13 @@ ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
|
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
memcpy(vin, y_buf + np, r * SBPP); \
|
||||||
memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
memcpy(vin + 16, u_buf + (np >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
||||||
memcpy(vin + 48, a_buf + n, r * SBPP); \
|
memcpy(vin + 32, v_buf + (np >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
||||||
|
memcpy(vin + 48, a_buf + np, r * SBPP); \
|
||||||
ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \
|
ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \
|
||||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
memcpy(dst_ptr + (np >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_I210ALPHATOARGBROW_NEON
|
#ifdef HAS_I210ALPHATOARGBROW_NEON
|
||||||
@ -221,12 +225,13 @@ ANY41CT(I410AlphaToARGBRow_Any_AVX2,
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n); \
|
ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, r_buf + n, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 16, g_buf + n, r * SBPP); \
|
memcpy(vin, r_buf + np, r * SBPP); \
|
||||||
memcpy(vin + 32, b_buf + n, r * SBPP); \
|
memcpy(vin + 16, g_buf + np, r * SBPP); \
|
||||||
memcpy(vin + 48, a_buf + n, r * SBPP); \
|
memcpy(vin + 32, b_buf + np, r * SBPP); \
|
||||||
|
memcpy(vin + 48, a_buf + np, r * SBPP); \
|
||||||
ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, depth, MASK + 1); \
|
ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, depth, MASK + 1); \
|
||||||
memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \
|
memcpy((uint8_t*)dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_MERGEAR64ROW_AVX2
|
#ifdef HAS_MERGEAR64ROW_AVX2
|
||||||
@ -260,22 +265,23 @@ ANY41PT(MergeARGB16To8Row_Any_NEON,
|
|||||||
#undef ANY41PT
|
#undef ANY41PT
|
||||||
|
|
||||||
// Any 3 planes to 1.
|
// Any 3 planes to 1.
|
||||||
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
|
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
|
||||||
const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
|
const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t vin[64 * 3]); \
|
SIMD_ALIGNED(uint8_t vin[64 * 3]); \
|
||||||
SIMD_ALIGNED(uint8_t vout[64]); \
|
SIMD_ALIGNED(uint8_t vout[64]); \
|
||||||
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
|
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n, r); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
memcpy(vin, y_buf + np, r); \
|
||||||
memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
memcpy(vin + 64, u_buf + (np >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||||
ANY_SIMD(vin, vin + 64, vin + 128, vout, MASK + 1); \
|
memcpy(vin + 128, v_buf + (np >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
ANY_SIMD(vin, vin + 64, vin + 128, vout, MASK + 1); \
|
||||||
|
memcpy(dst_ptr + (np >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge functions.
|
// Merge functions.
|
||||||
@ -337,27 +343,28 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
|
|||||||
// Note that odd width replication includes 444 due to implementation
|
// Note that odd width replication includes 444 due to implementation
|
||||||
// on arm that subsamples 444 to 422 internally.
|
// on arm that subsamples 444 to 422 internally.
|
||||||
// Any 3 planes to 1 with yuvconstants
|
// Any 3 planes to 1 with yuvconstants
|
||||||
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
|
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
|
||||||
const uint8_t* v_buf, uint8_t* dst_ptr, \
|
const uint8_t* v_buf, uint8_t* dst_ptr, \
|
||||||
const struct YuvConstants* yuvconstants, int width) { \
|
const struct YuvConstants* yuvconstants, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t vin[128 * 3]); \
|
SIMD_ALIGNED(uint8_t vin[128 * 3]); \
|
||||||
SIMD_ALIGNED(uint8_t vout[128]); \
|
SIMD_ALIGNED(uint8_t vout[128]); \
|
||||||
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n, r); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
memcpy(vin, y_buf + np, r); \
|
||||||
memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
memcpy(vin + 128, u_buf + (np >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||||
if (width & 1) { \
|
memcpy(vin + 256, v_buf + (np >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||||
vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \
|
if (width & 1) { \
|
||||||
vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1]; \
|
vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \
|
||||||
} \
|
vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1]; \
|
||||||
ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1); \
|
} \
|
||||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1); \
|
||||||
|
memcpy(dst_ptr + (np >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||||
@ -464,23 +471,24 @@ ANY31C(I444ToARGBRow_Any_LSX, I444ToARGBRow_LSX, 0, 0, 4, 15)
|
|||||||
|
|
||||||
// Any 3 planes of 16 bit to 1 with yuvconstants
|
// Any 3 planes of 16 bit to 1 with yuvconstants
|
||||||
// TODO(fbarchard): consider sharing this code with ANY31C
|
// TODO(fbarchard): consider sharing this code with ANY31C
|
||||||
#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
|
#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \
|
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \
|
||||||
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
|
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
|
||||||
int width) { \
|
int width) { \
|
||||||
SIMD_ALIGNED(T vin[16 * 3]); \
|
SIMD_ALIGNED(T vin[16 * 3]); \
|
||||||
SIMD_ALIGNED(uint8_t vout[64]); \
|
SIMD_ALIGNED(uint8_t vout[64]); \
|
||||||
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
memcpy(vin, y_buf + np, r * SBPP); \
|
||||||
memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
memcpy(vin + 16, u_buf + (np >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
||||||
ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1); \
|
memcpy(vin + 32, v_buf + (np >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
|
||||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1); \
|
||||||
|
memcpy(dst_ptr + (np >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_I210TOAR30ROW_SSSE3
|
#ifdef HAS_I210TOAR30ROW_SSSE3
|
||||||
@ -551,11 +559,12 @@ ANY31CT(I212ToAR30Row_Any_NEON, I212ToAR30Row_NEON, 1, 0, uint16_t, 2, 4, 7)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n); \
|
ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, r_buf + n, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 16, g_buf + n, r * SBPP); \
|
memcpy(vin, r_buf + np, r * SBPP); \
|
||||||
memcpy(vin + 32, b_buf + n, r * SBPP); \
|
memcpy(vin + 16, g_buf + np, r * SBPP); \
|
||||||
|
memcpy(vin + 32, b_buf + np, r * SBPP); \
|
||||||
ANY_SIMD(vin, vin + 16, vin + 32, vout, depth, MASK + 1); \
|
ANY_SIMD(vin, vin + 16, vin + 32, vout, depth, MASK + 1); \
|
||||||
memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \
|
memcpy((uint8_t*)dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_MERGEXR30ROW_AVX2
|
#ifdef HAS_MERGEXR30ROW_AVX2
|
||||||
@ -615,11 +624,12 @@ ANY31PT(MergeXRGB16To8Row_Any_NEON,
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
|
ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n * SBPP, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
|
memcpy(vin, y_buf + np * SBPP, r * SBPP); \
|
||||||
|
memcpy(vin + 128, uv_buf + (np >> UVSHIFT) * SBPP2, \
|
||||||
SS(r, UVSHIFT) * SBPP2); \
|
SS(r, UVSHIFT) * SBPP2); \
|
||||||
ANY_SIMD(vin, vin + 128, vout, MASK + 1); \
|
ANY_SIMD(vin, vin + 128, vout, MASK + 1); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
memcpy(dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge functions.
|
// Merge functions.
|
||||||
@ -757,10 +767,11 @@ ANY21(SobelXYRow_Any_LSX, SobelXYRow_LSX, 0, 1, 1, 4, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_yuy2, stride_yuy2, dst_uv, n * 2); \
|
ANY_SIMD(src_yuy2, stride_yuy2, dst_uv, n * 2); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_yuy2 + n * SBPP, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 32, src_yuy2 + stride_yuy2 + n * SBPP, r * SBPP); \
|
memcpy(vin, src_yuy2 + np * SBPP, r * SBPP); \
|
||||||
|
memcpy(vin + 32, src_yuy2 + stride_yuy2 + np * SBPP, r * SBPP); \
|
||||||
ANY_SIMD(vin, 32, vout, MASK + 1); \
|
ANY_SIMD(vin, 32, vout, MASK + 1); \
|
||||||
memcpy(dst_uv + n * BPP, vout, r * BPP); \
|
memcpy(dst_uv + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_YUY2TONVUVROW_NEON
|
#ifdef HAS_YUY2TONVUVROW_NEON
|
||||||
@ -785,11 +796,12 @@ ANY21S(YUY2ToNVUVRow_Any_AVX2, YUY2ToNVUVRow_AVX2, 4, 2, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
|
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n * SBPP, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \
|
memcpy(vin, y_buf + np * SBPP, r * SBPP); \
|
||||||
|
memcpy(vin + 128, uv_buf + (np >> UVSHIFT) * SBPP2, \
|
||||||
SS(r, UVSHIFT) * SBPP2); \
|
SS(r, UVSHIFT) * SBPP2); \
|
||||||
ANY_SIMD(vin, vin + 128, vout, yuvconstants, MASK + 1); \
|
ANY_SIMD(vin, vin + 128, vout, yuvconstants, MASK + 1); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
memcpy(dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Biplanar to RGB.
|
// Biplanar to RGB.
|
||||||
@ -868,21 +880,22 @@ ANY21C(NV12ToRGB565Row_Any_LASX, NV12ToRGB565Row_LASX, 1, 1, 2, 2, 15)
|
|||||||
#undef ANY21C
|
#undef ANY21C
|
||||||
|
|
||||||
// Any 2 planes of 16 bit to 1 with yuvconstants
|
// Any 2 planes of 16 bit to 1 with yuvconstants
|
||||||
#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
|
#define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \
|
void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \
|
||||||
const struct YuvConstants* yuvconstants, int width) { \
|
const struct YuvConstants* yuvconstants, int width) { \
|
||||||
SIMD_ALIGNED(T vin[16 * 2]); \
|
SIMD_ALIGNED(T vin[16 * 2]); \
|
||||||
SIMD_ALIGNED(uint8_t vout[64]); \
|
SIMD_ALIGNED(uint8_t vout[64]); \
|
||||||
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
|
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, y_buf + n, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
|
memcpy(vin, y_buf + np, r * SBPP); \
|
||||||
ANY_SIMD(vin, vin + 16, vout, yuvconstants, MASK + 1); \
|
memcpy(vin + 16, uv_buf + 2 * (np >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \
|
||||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
ANY_SIMD(vin, vin + 16, vout, yuvconstants, MASK + 1); \
|
||||||
|
memcpy(dst_ptr + (np >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_P210TOAR30ROW_SSSE3
|
#ifdef HAS_P210TOAR30ROW_SSSE3
|
||||||
@ -936,10 +949,11 @@ ANY21CT(P410ToARGBRow_Any_NEON, P410ToARGBRow_NEON, 0, 0, uint16_t, 2, 4, 7)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_u, src_v, dst_uv, depth, n); \
|
ANY_SIMD(src_u, src_v, dst_uv, depth, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_u + n, r * BPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 16, src_v + n, r * BPP); \
|
memcpy(vin, src_u + np, r * BPP); \
|
||||||
|
memcpy(vin + 16, src_v + np, r * BPP); \
|
||||||
ANY_SIMD(vin, vin + 16, vout, depth, MASK + 1); \
|
ANY_SIMD(vin, vin + 16, vout, depth, MASK + 1); \
|
||||||
memcpy(dst_uv + n * 2, vout, r * BPP * 2); \
|
memcpy(dst_uv + np * 2, vout, r * BPP * 2); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_MERGEUVROW_16_AVX2
|
#ifdef HAS_MERGEUVROW_16_AVX2
|
||||||
@ -952,19 +966,20 @@ ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7)
|
|||||||
#undef ANY21CT
|
#undef ANY21CT
|
||||||
|
|
||||||
// Any 1 to 1.
|
// Any 1 to 1.
|
||||||
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
|
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t vin[128]); \
|
SIMD_ALIGNED(uint8_t vin[128]); \
|
||||||
SIMD_ALIGNED(uint8_t vout[128]); \
|
SIMD_ALIGNED(uint8_t vout[128]); \
|
||||||
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
ANY_SIMD(vin, vout, MASK + 1); \
|
memcpy(vin, src_ptr + (np >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
ANY_SIMD(vin, vout, MASK + 1); \
|
||||||
|
memcpy(dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_COPYROW_AVX512BW
|
#ifdef HAS_COPYROW_AVX512BW
|
||||||
@ -1464,21 +1479,22 @@ ANY11(ARGBExtractAlphaRow_Any_LSX, ARGBExtractAlphaRow_LSX, 0, 4, 1, 15)
|
|||||||
#undef ANY11
|
#undef ANY11
|
||||||
|
|
||||||
// Any 1 to 1 blended. Destination is read, modify, write.
|
// Any 1 to 1 blended. Destination is read, modify, write.
|
||||||
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
|
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t vin[64]); \
|
SIMD_ALIGNED(uint8_t vin[64]); \
|
||||||
SIMD_ALIGNED(uint8_t vout[64]); \
|
SIMD_ALIGNED(uint8_t vout[64]); \
|
||||||
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
||||||
memset(vout, 0, sizeof(vout)); /* for msan */ \
|
memset(vout, 0, sizeof(vout)); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vout, dst_ptr + n * BPP, r * BPP); \
|
memcpy(vin, src_ptr + (np >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||||
ANY_SIMD(vin, vout, MASK + 1); \
|
memcpy(vout, dst_ptr + np * BPP, r * BPP); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
ANY_SIMD(vin, vout, MASK + 1); \
|
||||||
|
memcpy(dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
|
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
|
||||||
@ -1506,9 +1522,10 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_ptr, param, n); \
|
ANY_SIMD(src_ptr, dst_ptr, param, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + n * SBPP, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr + np * SBPP, r * SBPP); \
|
||||||
ANY_SIMD(vin, vout, param, MASK + 1); \
|
ANY_SIMD(vin, vout, param, MASK + 1); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
memcpy(dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_I400TOARGBROW_SSE2)
|
#if defined(HAS_I400TOARGBROW_SSE2)
|
||||||
@ -1632,9 +1649,10 @@ ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, (uint8_t*)(src_ptr) + np * SBPP, r * SBPP); \
|
||||||
ANY_SIMD((STYPE*)vin, (DTYPE*)vout, MASK + 1); \
|
ANY_SIMD((STYPE*)vin, (DTYPE*)vout, MASK + 1); \
|
||||||
memcpy((uint8_t*)(dst_ptr) + n * BPP, vout, r * BPP); \
|
memcpy((uint8_t*)(dst_ptr) + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOAR64ROW_SSSE3
|
#ifdef HAS_ARGBTOAR64ROW_SSSE3
|
||||||
@ -1698,9 +1716,10 @@ ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_ptr, scale, n); \
|
ANY_SIMD(src_ptr, dst_ptr, scale, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + n, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr + np, r * SBPP); \
|
||||||
ANY_SIMD(vin, vout, scale, MASK + 1); \
|
ANY_SIMD(vin, vout, scale, MASK + 1); \
|
||||||
memcpy(dst_ptr + n, vout, r * BPP); \
|
memcpy(dst_ptr + np, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_CONVERT16TO8ROW_SSSE3
|
#ifdef HAS_CONVERT16TO8ROW_SSSE3
|
||||||
@ -1804,9 +1823,10 @@ ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_ptr, scale, bias, n); \
|
ANY_SIMD(src_ptr, dst_ptr, scale, bias, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + n, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr + np, r * SBPP); \
|
||||||
ANY_SIMD(vin, vout, scale, bias, MASK + 1); \
|
ANY_SIMD(vin, vout, scale, bias, MASK + 1); \
|
||||||
memcpy(dst_ptr + n, vout, r * BPP); \
|
memcpy(dst_ptr + np, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_CONVERT8TO8ROW_NEON
|
#ifdef HAS_CONVERT8TO8ROW_NEON
|
||||||
@ -1840,9 +1860,10 @@ ANY11SB(Convert8To8Row_Any_AVX2,
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_ptr, param, n); \
|
ANY_SIMD(src_ptr, dst_ptr, param, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + n, r * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr + np, r * SBPP); \
|
||||||
ANY_SIMD(vin, vout, param, MASK + 1); \
|
ANY_SIMD(vin, vout, param, MASK + 1); \
|
||||||
memcpy(dst_ptr + n, vout, r * BPP); \
|
memcpy(dst_ptr + np, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_HALFFLOATROW_SSE2
|
#ifdef HAS_HALFFLOATROW_SSE2
|
||||||
@ -1876,20 +1897,21 @@ ANY11P16(HalfFloatRow_Any_LSX, HalfFloatRow_LSX, uint16_t, uint16_t, 2, 2, 31)
|
|||||||
#undef ANY11P16
|
#undef ANY11P16
|
||||||
|
|
||||||
// Any 1 to 1 with yuvconstants
|
// Any 1 to 1 with yuvconstants
|
||||||
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
|
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
|
||||||
const struct YuvConstants* yuvconstants, int width) { \
|
const struct YuvConstants* yuvconstants, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t vin[128]); \
|
SIMD_ALIGNED(uint8_t vin[128]); \
|
||||||
SIMD_ALIGNED(uint8_t vout[128]); \
|
SIMD_ALIGNED(uint8_t vout[128]); \
|
||||||
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
|
ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
ptrdiff_t np = n; \
|
||||||
ANY_SIMD(vin, vout, yuvconstants, MASK + 1); \
|
memcpy(vin, src_ptr + (np >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
ANY_SIMD(vin, vout, yuvconstants, MASK + 1); \
|
||||||
|
memcpy(dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||||
@ -1926,13 +1948,14 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \
|
ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr + np * SBPP, r * SBPP * sizeof(TS)); \
|
||||||
if (source_y_fraction) { \
|
if (source_y_fraction) { \
|
||||||
memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \
|
memcpy(vin + 64, src_ptr + src_stride + np * SBPP, \
|
||||||
r * SBPP * sizeof(TS)); \
|
r * SBPP * sizeof(TS)); \
|
||||||
} \
|
} \
|
||||||
ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction); \
|
ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \
|
memcpy(dst_ptr + np * BPP, vout, r * BPP * sizeof(TD)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_INTERPOLATEROW_AVX2
|
#ifdef HAS_INTERPOLATEROW_AVX2
|
||||||
@ -1980,13 +2003,14 @@ ANY11I(InterpolateRow_16_Any_NEON,
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \
|
ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr + np * SBPP, r * SBPP * sizeof(TS)); \
|
||||||
if (source_y_fraction) { \
|
if (source_y_fraction) { \
|
||||||
memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \
|
memcpy(vin + 64, src_ptr + src_stride + np * SBPP, \
|
||||||
r * SBPP * sizeof(TS)); \
|
r * SBPP * sizeof(TS)); \
|
||||||
} \
|
} \
|
||||||
ANY_SIMD(vout, vin, 64, scale, MASK + 1, source_y_fraction); \
|
ANY_SIMD(vout, vin, 64, scale, MASK + 1, source_y_fraction); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \
|
memcpy(dst_ptr + np * BPP, vout, r * BPP * sizeof(TD)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_INTERPOLATEROW_16TO8_NEON
|
#ifdef HAS_INTERPOLATEROW_16TO8_NEON
|
||||||
@ -2021,9 +2045,10 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2,
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
|
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr, r* BPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr, r * BPP); \
|
||||||
ANY_SIMD(vin, vout, MASK + 1); \
|
ANY_SIMD(vin, vout, MASK + 1); \
|
||||||
memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
|
memcpy(dst_ptr + np * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_MIRRORROW_AVX2
|
#ifdef HAS_MIRRORROW_AVX2
|
||||||
@ -2097,8 +2122,9 @@ ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(dst_ptr, v32, n); \
|
ANY_SIMD(dst_ptr, v32, n); \
|
||||||
} \
|
} \
|
||||||
|
ptrdiff_t np = n; \
|
||||||
ANY_SIMD(vout, v32, MASK + 1); \
|
ANY_SIMD(vout, v32, MASK + 1); \
|
||||||
memcpy(dst_ptr + n * BPP, vout, r * BPP); \
|
memcpy(dst_ptr + np * BPP, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SETROW_X86
|
#ifdef HAS_SETROW_X86
|
||||||
@ -2122,21 +2148,22 @@ ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3)
|
|||||||
#undef ANY1
|
#undef ANY1
|
||||||
|
|
||||||
// Any 1 to 2. Outputs UV planes.
|
// Any 1 to 2. Outputs UV planes.
|
||||||
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
|
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
|
||||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
|
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
|
||||||
int width) { \
|
int width) { \
|
||||||
SIMD_ALIGNED(uint8_t vin[128]); \
|
SIMD_ALIGNED(uint8_t vin[128]); \
|
||||||
SIMD_ALIGNED(uint8_t vout[128 * 2]); \
|
SIMD_ALIGNED(uint8_t vout[128 * 2]); \
|
||||||
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
memset(vin, 0, sizeof(vin)); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_u, dst_v, n); \
|
ANY_SIMD(src_ptr, dst_u, dst_v, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
ptrdiff_t np = n; \
|
||||||
ANY_SIMD(vin, vout, vout + 128, MASK + 1); \
|
memcpy(vin, src_ptr + (np >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||||
memcpy(dst_u + (n >> DUVSHIFT), vout, SS(r, DUVSHIFT)); \
|
ANY_SIMD(vin, vout, vout + 128, MASK + 1); \
|
||||||
memcpy(dst_v + (n >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT)); \
|
memcpy(dst_u + (np >> DUVSHIFT), vout, SS(r, DUVSHIFT)); \
|
||||||
|
memcpy(dst_v + (np >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SPLITUVROW_SSE2
|
#ifdef HAS_SPLITUVROW_SSE2
|
||||||
@ -2212,10 +2239,11 @@ ANY12(UYVYToUV422Row_Any_LASX, UYVYToUV422Row_LASX, 1, 4, 1, 31)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \
|
ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_uv + n * 2, r * BPP * 2); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_uv + np * 2, r * BPP * 2); \
|
||||||
ANY_SIMD(vin, vout, vout + 16, depth, MASK + 1); \
|
ANY_SIMD(vin, vout, vout + 16, depth, MASK + 1); \
|
||||||
memcpy(dst_u + n, vout, r * BPP); \
|
memcpy(dst_u + np, vout, r * BPP); \
|
||||||
memcpy(dst_v + n, vout + 16, r * BPP); \
|
memcpy(dst_v + np, vout + 16, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SPLITUVROW_16_AVX2
|
#ifdef HAS_SPLITUVROW_16_AVX2
|
||||||
@ -2240,11 +2268,12 @@ ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
|
ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + n * BPP, r * BPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr + np * BPP, r * BPP); \
|
||||||
ANY_SIMD(vin, vout, vout + 16, vout + 32, MASK + 1); \
|
ANY_SIMD(vin, vout, vout + 16, vout + 32, MASK + 1); \
|
||||||
memcpy(dst_r + n, vout, r); \
|
memcpy(dst_r + np, vout, r); \
|
||||||
memcpy(dst_g + n, vout + 16, r); \
|
memcpy(dst_g + np, vout + 16, r); \
|
||||||
memcpy(dst_b + n, vout + 32, r); \
|
memcpy(dst_b + np, vout + 32, r); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SPLITRGBROW_SSSE3
|
#ifdef HAS_SPLITRGBROW_SSSE3
|
||||||
@ -2284,12 +2313,13 @@ ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \
|
ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + n * BPP, r * BPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_ptr + np * BPP, r * BPP); \
|
||||||
ANY_SIMD(vin, vout, vout + 16, vout + 32, vout + 48, MASK + 1); \
|
ANY_SIMD(vin, vout, vout + 16, vout + 32, vout + 48, MASK + 1); \
|
||||||
memcpy(dst_r + n, vout, r); \
|
memcpy(dst_r + np, vout, r); \
|
||||||
memcpy(dst_g + n, vout + 16, r); \
|
memcpy(dst_g + np, vout + 16, r); \
|
||||||
memcpy(dst_b + n, vout + 32, r); \
|
memcpy(dst_b + np, vout + 32, r); \
|
||||||
memcpy(dst_a + n, vout + 48, r); \
|
memcpy(dst_a + np, vout + 48, r); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SPLITARGBROW_SSE2
|
#ifdef HAS_SPLITARGBROW_SSE2
|
||||||
@ -2318,8 +2348,9 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \
|
ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
|
memcpy(vin, src_ptr + (np >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||||
|
memcpy(vin + 128, src_ptr + src_stride + (np >> UVSHIFT) * BPP, \
|
||||||
SS(r, UVSHIFT) * BPP); \
|
SS(r, UVSHIFT) * BPP); \
|
||||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||||
memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
|
memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
|
||||||
@ -2328,8 +2359,8 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
|
|||||||
vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||||
} \
|
} \
|
||||||
ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1); \
|
ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1); \
|
||||||
memcpy(dst_u + (n >> 1), vout, SS(r, 1)); \
|
memcpy(dst_u + (np >> 1), vout, SS(r, 1)); \
|
||||||
memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1)); \
|
memcpy(dst_v + (np >> 1), vout + 128, SS(r, 1)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||||
@ -2550,8 +2581,9 @@ ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_ptr, src_stride, dst_vu, n); \
|
ANY_SIMD(src_ptr, src_stride, dst_vu, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \
|
memcpy(vin, src_ptr + (np >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||||
|
memcpy(vin + 128, src_ptr + src_stride + (np >> UVSHIFT) * BPP, \
|
||||||
SS(r, UVSHIFT) * BPP); \
|
SS(r, UVSHIFT) * BPP); \
|
||||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||||
memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
|
memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \
|
||||||
@ -2560,7 +2592,7 @@ ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31)
|
|||||||
vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||||
} \
|
} \
|
||||||
ANY_SIMD(vin, 128, vout, MASK + 1); \
|
ANY_SIMD(vin, 128, vout, MASK + 1); \
|
||||||
memcpy(dst_vu + (n >> 1) * 2, vout, SS(r, 1) * 2); \
|
memcpy(dst_vu + (np >> 1) * 2, vout, SS(r, 1) * 2); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_AYUVTOVUROW_NEON
|
#ifdef HAS_AYUVTOVUROW_NEON
|
||||||
@ -2585,9 +2617,10 @@ ANY11S(AYUVToVURow_Any_SVE2, AYUVToVURow_SVE2, 0, 4, 1)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src, src_tile_stride, dst, n); \
|
ANY_SIMD(src, src_tile_stride, dst, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src + (n / 16) * src_tile_stride, r * BPP); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src + (np / 16) * src_tile_stride, r * BPP); \
|
||||||
ANY_SIMD(vin, src_tile_stride, vout, MASK + 1); \
|
ANY_SIMD(vin, src_tile_stride, vout, MASK + 1); \
|
||||||
memcpy(dst + n, vout, r * BPP); \
|
memcpy(dst + np, vout, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_DETILEROW_NEON
|
#ifdef HAS_DETILEROW_NEON
|
||||||
@ -2618,10 +2651,11 @@ ANYDETILE(DetileRow_16_Any_AVX, DetileRow_16_AVX, uint16_t, 2, 15)
|
|||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n); \
|
ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_uv + (n / 16) * src_tile_stride, r); \
|
ptrdiff_t np = n; \
|
||||||
|
memcpy(vin, src_uv + (np / 16) * src_tile_stride, r); \
|
||||||
ANY_SIMD(vin, src_tile_stride, vout, vout + 8, r); \
|
ANY_SIMD(vin, src_tile_stride, vout, vout + 8, r); \
|
||||||
memcpy(dst_u + n / 2, vout, (r + 1) / 2); \
|
memcpy(dst_u + np / 2, vout, (r + 1) / 2); \
|
||||||
memcpy(dst_v + n / 2, vout + 8, (r + 1) / 2); \
|
memcpy(dst_v + np / 2, vout + 8, (r + 1) / 2); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_DETILESPLITUVROW_NEON
|
#ifdef HAS_DETILESPLITUVROW_NEON
|
||||||
@ -2644,10 +2678,11 @@ ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15)
|
|||||||
ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \
|
ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \
|
||||||
n); \
|
n); \
|
||||||
} \
|
} \
|
||||||
memcpy(vin, src_y + (n / 16) * src_y_tile_stride, r); \
|
ptrdiff_t np = n; \
|
||||||
memcpy(vin + 16, src_uv + (n / 16) * src_uv_tile_stride, r); \
|
memcpy(vin, src_y + (np / 16) * src_y_tile_stride, r); \
|
||||||
|
memcpy(vin + 16, src_uv + (np / 16) * src_uv_tile_stride, r); \
|
||||||
ANY_SIMD(vin, src_y_tile_stride, vin + 16, src_uv_tile_stride, vout, r); \
|
ANY_SIMD(vin, src_y_tile_stride, vin + 16, src_uv_tile_stride, vout, r); \
|
||||||
memcpy(dst_yuy2 + 2 * n, vout, 2 * r); \
|
memcpy(dst_yuy2 + 2 * np, vout, 2 * r); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_DETILETOYUY2_NEON
|
#ifdef HAS_DETILETOYUY2_NEON
|
||||||
|
|||||||
@ -2720,6 +2720,32 @@ TEST_F(LibYUVConvertTest, TestUYVYToARGB) {
|
|||||||
uint32_t checksum = HashDjb2(&dest_argb[0][0], sizeof(dest_argb), 5381);
|
uint32_t checksum = HashDjb2(&dest_argb[0][0], sizeof(dest_argb), 5381);
|
||||||
EXPECT_EQ(3486643515u, checksum);
|
EXPECT_EQ(3486643515u, checksum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(_M_X64) || defined(__aarch64__)
|
||||||
|
TEST_F(LibYUVConvertTest, TestI400LargeSize) {
|
||||||
|
// The width and height are chosen as follows:
|
||||||
|
// - kWidth * kHeight is not a multiple of 8: This lets us to use the Any
|
||||||
|
// variant of the conversion function.
|
||||||
|
const int kWidth = 1073741823;
|
||||||
|
const int kHeight = 2;
|
||||||
|
// Allocate one extra column so that the coalesce optimizations do not trigger
|
||||||
|
// in convert_argb.cc (they are triggered only when stride is equal to width).
|
||||||
|
const size_t kStride = kWidth + 1;
|
||||||
|
align_buffer_page_end(orig_i400, (size_t) kWidth * kHeight);
|
||||||
|
ASSERT_NE(orig_i400, nullptr);
|
||||||
|
align_buffer_page_end(dest_argb, (size_t) kWidth * kHeight * 4);
|
||||||
|
ASSERT_NE(dest_argb, nullptr);
|
||||||
|
for (int i = 0; i < kWidth * kHeight; ++i) {
|
||||||
|
orig_i400[i] = i % 256;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(I400ToARGBMatrix(orig_i400, kStride, dest_argb, kWidth,
|
||||||
|
&kYuvJPEGConstants, kWidth, kHeight),
|
||||||
|
0);
|
||||||
|
free_aligned_buffer_page_end(dest_argb);
|
||||||
|
free_aligned_buffer_page_end(orig_i400);
|
||||||
|
}
|
||||||
|
#endif // defined(_M_X64) || defined(_M_X64) || defined(__aarch64__)
|
||||||
|
|
||||||
#endif // !defined(LEAN_TESTS)
|
#endif // !defined(LEAN_TESTS)
|
||||||
|
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user