mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
add YUV24 and AYUV formats
Alternatives to RGB24 and AYUV for working with GPU. BUG=libyuv:832 TESTED=out/Release/libyuv_unittest --gtest_filter=*NV21To???24* --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1 R=rrwinterton@gmail.com Change-Id: I5559c63f4bd4c847492fcb1571f7b03c58146689 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1501735 Reviewed-by: richard winterton <rrwinterton@gmail.com> Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
7ce50764c5
commit
5b6042fa0d
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1724
|
||||
Version: 1725
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -226,6 +226,17 @@ int UYVYToI420(const uint8_t* src_uyvy,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AYUV to NV21.
|
||||
LIBYUV_API
|
||||
int AYUVToNV21(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert M420 to I420.
|
||||
LIBYUV_API
|
||||
int M420ToI420(const uint8_t* src_m420,
|
||||
@ -375,13 +386,11 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
||||
int height);
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
// src_mjpg is pointer to raw jpeg bytes in memory
|
||||
// src_size_mjpg is size of jpeg in bytes
|
||||
// src_width/height provided by capture.
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int MJPGToI420(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
@ -395,8 +404,8 @@ int MJPGToI420(const uint8_t* src_mjpg,
|
||||
|
||||
// JPEG to NV21
|
||||
LIBYUV_API
|
||||
int MJPGToNV21(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int MJPGToNV21(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
@ -408,8 +417,8 @@ int MJPGToNV21(const uint8_t* src_mjpg,
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int MJPGSize(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
int* width,
|
||||
int* height);
|
||||
#endif
|
||||
|
||||
@ -298,6 +298,17 @@ int NV21ToRGB24(const uint8_t* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to YUV24.
|
||||
LIBYUV_API
|
||||
int NV21ToYUV24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int dst_stride_yuv24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV12 to RAW.
|
||||
LIBYUV_API
|
||||
int NV12ToRAW(const uint8_t* src_y,
|
||||
@ -627,8 +638,8 @@ int AR30ToAB30(const uint8_t* src_ar30,
|
||||
// src_width/height provided by capture
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int MJPGToARGB(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
|
||||
@ -26,7 +26,7 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg_size);
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -295,6 +295,8 @@ extern "C" {
|
||||
#define HAS_I422TOYUY2ROW_AVX2
|
||||
#define HAS_MERGEUVROW_16_AVX2
|
||||
#define HAS_MULTIPLYROW_16_AVX2
|
||||
// TODO(fbarchard): Fix AVX2 version of YUV24
|
||||
// #define HAS_NV21TOYUV24ROW_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for AVX512 clang x86 platforms:
|
||||
@ -330,6 +332,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOYJROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_AYUVTOVUROW_NEON
|
||||
#define HAS_AYUVTOYROW_NEON
|
||||
#define HAS_BGRATOUVROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_BYTETOFLOATROW_NEON
|
||||
@ -355,6 +359,7 @@ extern "C" {
|
||||
#define HAS_NV12TORGB565ROW_NEON
|
||||
#define HAS_NV21TOARGBROW_NEON
|
||||
#define HAS_NV21TORGB24ROW_NEON
|
||||
#define HAS_NV21TOYUV24ROW_NEON
|
||||
#define HAS_RAWTOARGBROW_NEON
|
||||
#define HAS_RAWTORGB24ROW_NEON
|
||||
#define HAS_RAWTOUVROW_NEON
|
||||
@ -402,6 +407,7 @@ extern "C" {
|
||||
|
||||
// The following are available on AArch64 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
#define HAS_FLOATDIVTOBYTEROW_NEON
|
||||
#define HAS_SCALESUMSAMPLES_NEON
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
@ -815,6 +821,10 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
@ -2183,6 +2193,10 @@ void NV21ToRGB24Row_C(const uint8_t* src_y,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_C(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
|
||||
uint8_t* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
@ -2349,6 +2363,10 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_uv,
|
||||
uint8_t* dst_rgb565,
|
||||
@ -2554,6 +2572,10 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
@ -3027,6 +3049,10 @@ void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToYUV24Row_Any_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width);
|
||||
void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
@ -3345,6 +3371,19 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
|
||||
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||
void AYUVToVURow_C(const uint8_t* src_ayuv, int stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width);
|
||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv, int stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width);
|
||||
void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||
void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv, int stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width);
|
||||
|
||||
void I422ToYUY2Row_C(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
@ -3960,6 +3999,18 @@ float ScaleSumSamples_NEON(const float* src,
|
||||
void ScaleSamples_C(const float* src, float* dst, float scale, int width);
|
||||
void ScaleSamples_NEON(const float* src, float* dst, float scale, int width);
|
||||
|
||||
void FloatDivToByteRow_C(const float* src_weights,
|
||||
const float* src_values,
|
||||
uint8_t* dst_out,
|
||||
uint8_t* dst_mask,
|
||||
int width);
|
||||
void FloatDivToByteRow_NEON(const float* src_weights,
|
||||
const float* src_values,
|
||||
uint8_t* dst_out,
|
||||
uint8_t* dst_mask,
|
||||
int width);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1724
|
||||
#define LIBYUV_VERSION 1725
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -880,6 +880,76 @@ int UYVYToI420(const uint8_t* src_uyvy,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert AYUV to NV21.
|
||||
LIBYUV_API
|
||||
int AYUVToNV21(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*AYUVToVURow)(const uint8_t* src_ayuv, int src_stride_ayuv,
|
||||
uint8_t* dst_vu, int width) =
|
||||
AYUVToVURow_C;
|
||||
void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
|
||||
AYUVToYRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv;
|
||||
src_stride_ayuv = -src_stride_ayuv;
|
||||
}
|
||||
// place holders for future intel code
|
||||
#if defined(HAS_AYUVTOYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
AYUVToVURow = AYUVToVURow_Any_SSE2;
|
||||
AYUVToYRow = AYUVToYRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
AYUVToVURow = AYUVToVURow_SSE2;
|
||||
AYUVToYRow = AYUVToYRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AYUVTOYROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
AYUVToVURow = AYUVToVURow_Any_AVX2;
|
||||
AYUVToYRow = AYUVToYRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
AYUVToVURow = AYUVToVURow_AVX2;
|
||||
AYUVToYRow = AYUVToYRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_AYUVTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
AYUVToYRow = AYUVToYRow_Any_NEON;
|
||||
AYUVToVURow = AYUVToVURow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
AYUVToYRow = AYUVToYRow_NEON;
|
||||
AYUVToVURow = AYUVToVURow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
AYUVToVURow(src_ayuv, src_stride_ayuv, dst_vu, width);
|
||||
AYUVToYRow(src_ayuv, dst_y, width);
|
||||
AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width);
|
||||
src_ayuv += src_stride_ayuv * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_vu += dst_stride_vu;
|
||||
}
|
||||
if (height & 1) {
|
||||
AYUVToVURow(src_ayuv, 0, dst_vu, width);
|
||||
AYUVToYRow(src_ayuv, dst_y, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8_t* src_argb,
|
||||
@ -2165,6 +2235,7 @@ int Android420ToI420(const uint8_t* src_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
@ -1998,6 +1998,56 @@ int NV21ToRAW(const uint8_t* src_y,
|
||||
dst_stride_raw, &kYvuI601Constants, width, height);
|
||||
}
|
||||
|
||||
// Convert NV21 to YUV24
|
||||
int NV21ToYUV24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int dst_stride_yuv24,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*NV21ToYUV24Row)(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) = NV21ToYUV24Row_C;
|
||||
if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_yuv24 = dst_yuv24 + (height - 1) * dst_stride_yuv24;
|
||||
dst_stride_yuv24 = -dst_stride_yuv24;
|
||||
}
|
||||
#if defined(HAS_NV21TOYUV24ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
NV21ToYUV24Row = NV21ToYUV24Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV21ToYUV24Row = NV21ToYUV24Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV21TOYUV24ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
NV21ToYUV24Row = NV21ToYUV24Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
NV21ToYUV24Row = NV21ToYUV24Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV21ToYUV24Row(src_y, src_vu, dst_yuv24, width);
|
||||
dst_yuv24 += dst_stride_yuv24;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_vu += src_stride_vu;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert M420 to ARGB.
|
||||
LIBYUV_API
|
||||
int M420ToARGB(const uint8_t* src_m420,
|
||||
|
||||
@ -286,7 +286,12 @@ ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
|
||||
#ifdef HAS_MERGEUVROW_MMI
|
||||
ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_NV21TOYUV24ROW_NEON
|
||||
ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOYUV24ROW_AVX2
|
||||
ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31)
|
||||
#endif
|
||||
// Math functions.
|
||||
#ifdef HAS_ARGBMULTIPLYROW_SSE2
|
||||
ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
|
||||
@ -702,6 +707,10 @@ ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
|
||||
#ifdef HAS_UYVYTOYROW_MMI
|
||||
ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_AYUVTOYROW_NEON
|
||||
ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOARGBROW_NEON
|
||||
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
|
||||
#endif
|
||||
@ -1381,6 +1390,36 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
|
||||
#endif
|
||||
#undef ANY12S
|
||||
|
||||
// Any 1 to 1 with source stride (2 rows of source). Outputs UV plane.
|
||||
// 128 byte row allows for 32 avx ARGB pixels.
|
||||
#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8_t temp[128 * 3]); \
|
||||
memset(temp, 0, 128 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
||||
SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
|
||||
BPP); \
|
||||
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
|
||||
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(temp, 128, temp + 256, MASK + 1); \
|
||||
memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \
|
||||
}
|
||||
|
||||
#ifdef HAS_AYUVTOVUROW_NEON
|
||||
ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
|
||||
#endif
|
||||
#undef ANY11S
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
@ -3231,6 +3231,73 @@ void GaussCol_C(const uint16_t* src0,
|
||||
}
|
||||
}
|
||||
|
||||
// Convert biplanar NV21 to packed YUV24
|
||||
void NV21ToYUV24Row_C(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) {
|
||||
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
dst_yuv24[0] = src_vu[0]; // V
|
||||
dst_yuv24[1] = src_vu[1]; // U
|
||||
dst_yuv24[2] = src_y[0]; // Y0
|
||||
dst_yuv24[3] = src_vu[0]; // V
|
||||
dst_yuv24[4] = src_vu[1]; // U
|
||||
dst_yuv24[5] = src_y[1]; // Y1
|
||||
src_y += 2;
|
||||
src_vu += 2;
|
||||
dst_yuv24 += 6; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
dst_yuv24[0] = src_vu[0]; // V
|
||||
dst_yuv24[1] = src_vu[1]; // U
|
||||
dst_yuv24[2] = src_y[0]; // Y0
|
||||
}
|
||||
}
|
||||
|
||||
// Filter 2 rows of AYUV UV's (444) into VU (420).
|
||||
void AYUVToVURow_C(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width) {
|
||||
// Output a row of VU values, filtering 2x2 rows of AYUV.
|
||||
int x;
|
||||
for (x = 0; x < width; x += 2) {
|
||||
dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] + src_ayuv[src_stride_ayuv + 4] + 2) >> 2;
|
||||
dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] + src_ayuv[src_stride_ayuv + 5] + 2) >> 2;
|
||||
src_ayuv += 8;
|
||||
dst_vu += 2;
|
||||
}
|
||||
if (width & 1) {
|
||||
dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + src_ayuv[src_stride_ayuv + 0] + 2) >> 2;
|
||||
dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + src_ayuv[src_stride_ayuv + 1] + 2) >> 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy row of AYUV Y's into Y
|
||||
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
||||
// Output a row of Y values.
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_y[x] = src_ayuv[2]; // v,u,y,a
|
||||
src_ayuv += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// divide values by weights and provide mask to indicate weight of 0.
|
||||
void FloatDivToByteRow_C(const float* src_weights,
|
||||
const float* src_values,
|
||||
uint8_t* dst_out,
|
||||
uint8_t* dst_mask,
|
||||
int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_out[x] = Clamp(src_values[x] / src_weights[x]);
|
||||
dst_mask[x] = src_weights[x] > 0 ? 0 : 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
@ -6669,6 +6669,121 @@ void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
|
||||
}
|
||||
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
||||
|
||||
|
||||
#ifdef HAS_NV21TOYUV24ROW_AVX2
|
||||
|
||||
// begin NV21ToYUV24Row_C avx2 constants
|
||||
static const ulvec8 kBLEND0 = {0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80, 0x00,
|
||||
0x80, 0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80,
|
||||
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80,
|
||||
0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00};
|
||||
|
||||
static const ulvec8 kBLEND1 = {0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00,
|
||||
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
|
||||
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
|
||||
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80};
|
||||
|
||||
static const ulvec8 kBLEND2 = {0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
|
||||
0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80,
|
||||
0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00,
|
||||
0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00};
|
||||
|
||||
static const ulvec8 kSHUF0 = {0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d,
|
||||
0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05,
|
||||
0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d,
|
||||
0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05};
|
||||
|
||||
static const ulvec8 kSHUF1 = {0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02,
|
||||
0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80,
|
||||
0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02,
|
||||
0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80};
|
||||
|
||||
static const ulvec8 kSHUF2 = {0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80,
|
||||
0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f,
|
||||
0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80,
|
||||
0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f};
|
||||
|
||||
static const ulvec8 kSHUF3 = {0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80,
|
||||
0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80,
|
||||
0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80,
|
||||
0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80};
|
||||
|
||||
static const ulvec8 kSHUF4 = {0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80,
|
||||
0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a,
|
||||
0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80,
|
||||
0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a};
|
||||
|
||||
static const ulvec8 kSHUF5 = {0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07,
|
||||
0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80,
|
||||
0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07,
|
||||
0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80};
|
||||
|
||||
// NV21ToYUV24Row_AVX2
|
||||
void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) {
|
||||
|
||||
uint8_t* src_y_ptr;
|
||||
uint64_t src_offset = 0;
|
||||
uint64_t width64;
|
||||
|
||||
width64 = width;
|
||||
src_y_ptr = (uint8_t *) src_y;
|
||||
|
||||
asm volatile(
|
||||
"vmovdqu %5, %%ymm0 \n" //init blend value
|
||||
"vmovdqu %6, %%ymm1 \n" //init blend value
|
||||
"vmovdqu %7, %%ymm2 \n" //init blend value
|
||||
// "sub $0x20, %3 \n" //sub 32 from width for final loop
|
||||
|
||||
LABELALIGN
|
||||
"1: \n" //label 1
|
||||
"vmovdqu (%0,%4), %%ymm3 \n" //src_y
|
||||
"vmovdqu 1(%1,%4), %%ymm4 \n" //src_uv+1
|
||||
"vmovdqu (%1), %%ymm5 \n" //src_uv
|
||||
"vpshufb %8, %%ymm3, %%ymm13 \n" //y, kSHUF0 for shuf
|
||||
"vpshufb %9, %%ymm4, %%ymm14 \n" //uv+1, kSHUF1 for shuf
|
||||
"vpshufb %10, %%ymm5, %%ymm15 \n" //uv, kSHUF2 for shuf
|
||||
"vpshufb %11, %%ymm3, %%ymm3 \n" //y kSHUF3 for shuf
|
||||
"vpshufb %12, %%ymm4, %%ymm4 \n" //uv+1 kSHUF4 for shuf
|
||||
"vpblendvb %%ymm0, %%ymm14, %%ymm13, %%ymm12 \n" //blend 0
|
||||
"vpblendvb %%ymm0, %%ymm13, %%ymm14, %%ymm14 \n" //blend 0
|
||||
"vpblendvb %%ymm2, %%ymm15, %%ymm12, %%ymm12 \n" //blend 2
|
||||
"vpblendvb %%ymm1, %%ymm15, %%ymm14, %%ymm13 \n" //blend 1
|
||||
"vpshufb %13, %%ymm5, %%ymm15 \n" //shuffle const
|
||||
"vpor %%ymm4, %%ymm3, %%ymm5 \n" //get results
|
||||
"vmovdqu %%ymm12, 0x20(%2) \n" //store dst_yuv+20h
|
||||
"vpor %%ymm15, %%ymm5, %%ymm3 \n" //get results
|
||||
"add $0x20, %4 \n" //add to src buffer ptr
|
||||
"vinserti128 $0x1, %%xmm3, %%ymm13, %%ymm4 \n" //insert
|
||||
"vperm2i128 $0x31, %%ymm13, %%ymm3, %%ymm5 \n" //insert
|
||||
"vmovdqu %%ymm4, (%2) \n" //store dst_yuv
|
||||
"vmovdqu %%ymm5, 0x40(%2) \n" //store dst_yuv+40h
|
||||
"add $0x60,%2 \n" //add to dst buffer ptr
|
||||
// "cmp %3, %4 \n" //(width64 - 32 bytes) and src_offset
|
||||
"sub $0x20,%3 \n" // 32 pixels per loop
|
||||
"jg 1b \n"
|
||||
"vzeroupper \n" //sse-avx2 transistions
|
||||
|
||||
: "+r"(src_y), //%0
|
||||
"+r"(src_vu), //%1
|
||||
"+r"(dst_yuv24), //%2
|
||||
"+r"(width64), //%3
|
||||
"+r"(src_offset) //%4
|
||||
: "m"(kBLEND0), //%5
|
||||
"m"(kBLEND1), //%6
|
||||
"m"(kBLEND2), //%7
|
||||
"m"(kSHUF0), //%8
|
||||
"m"(kSHUF1), //%9
|
||||
"m"(kSHUF2), //%10
|
||||
"m"(kSHUF3), //%11
|
||||
"m"(kSHUF4), //%12
|
||||
"m"(kSHUF5) //%13
|
||||
: "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm12", "xmm13", "xmm14", "xmm15");
|
||||
}
|
||||
#endif // HAS_NV21TOYUV24ROW_AVX2
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -2685,6 +2685,77 @@ void ByteToFloatRow_NEON(const uint8_t* src,
|
||||
: "cc", "memory", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
// Convert biplanar NV21 to packed YUV24
|
||||
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
"vld1.8 {q2}, [%0]! \n" // load 16 Y values
|
||||
"vld2.8 {d0, d2}, [%1]! \n" // load 8 VU values
|
||||
"vmov d1, d0 \n"
|
||||
"vzip.u8 d0, d1 \n" // VV
|
||||
"vmov d3, d2 \n"
|
||||
"vzip.u8 d2, d3 \n" // UU
|
||||
"subs %3, %3, #16 \n" // 16 pixels per loop
|
||||
"vst3.8 {d0, d2, d4}, [%2]! \n" // store 16 YUV pixels
|
||||
"vst3.8 {d1, d3, d5}, [%2]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_vu), // %1
|
||||
"+r"(dst_yuv24), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2");
|
||||
}
|
||||
|
||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_AYUV
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels.
|
||||
"vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
|
||||
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV pixels.
|
||||
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV pixels.
|
||||
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vqrshrun.s16 d0, q0, #2 \n" // 2x2 average
|
||||
"vqrshrun.s16 d1, q1, #2 \n"
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop.
|
||||
"vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels VU.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(src_stride_ayuv), // %1
|
||||
"+r"(dst_vu), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"
|
||||
);
|
||||
}
|
||||
|
||||
// Copy row of AYUV Y's into Y.
|
||||
// Similar to ARGBExtractAlphaRow_NEON
|
||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||
"vst1.8 {q2}, [%1]! \n" // store 16 Y's.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -2876,6 +2876,113 @@ void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||
}
|
||||
|
||||
// Convert biplanar NV21 to packed YUV24
|
||||
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_yuv24,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
"ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
|
||||
"ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
|
||||
"zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values
|
||||
"zip1 v1.16b, v1.16b, v1.16b \n" // replicate U values
|
||||
"subs %w3, %w3, #16 \n" // 16 pixels per loop
|
||||
"st3 {v0.16b,v1.16b,v2.16b}, [%2], #48 \n" // store 16 YUV pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_vu), // %1
|
||||
"+r"(dst_yuv24), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2");
|
||||
}
|
||||
|
||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||
int src_stride_ayuv,
|
||||
uint8_t* dst_vu,
|
||||
int width) {
|
||||
const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
|
||||
asm volatile(
|
||||
|
||||
"1: \n"
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
||||
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
|
||||
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
|
||||
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
|
||||
"uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
|
||||
"uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
|
||||
"uqrshrn v0.8b, v0.8h, #2 \n" // 2x2 average
|
||||
"uqrshrn v1.8b, v1.8h, #2 \n"
|
||||
"subs %w3, %w3, #16 \n" // 16 processed per loop.
|
||||
"st2 {v0.8b,v1.8b}, [%2], #16 \n" // store 8 pixels VU.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(src_ayuv_1), // %1
|
||||
"+r"(dst_vu), // %2
|
||||
"+r"(width) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
|
||||
);
|
||||
}
|
||||
|
||||
// Copy row of AYUV Y's into Y
|
||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels
|
||||
"subs %w2, %w2, #16 \n" // 16 pixels per loop
|
||||
"st1 {v2.16b}, [%1], #16 \n" // store 16 Y pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ayuv), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3");
|
||||
}
|
||||
|
||||
void FloatDivToByteRow_NEON(const float* src_weights,
|
||||
const float* src_values,
|
||||
uint8_t* dst_out,
|
||||
uint8_t* dst_mask,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"movi v0.4s, #0 \n"
|
||||
|
||||
"1: \n"
|
||||
"ld1 {v1.4s,v2.4s}, [%0], #32 \n" // load 8 float weights
|
||||
"ld1 {v3.4s,v4.4s}, [%1], #32 \n" // load 8 float values
|
||||
"subs %w4, %w4, #8 \n" // 8 pixels per loop
|
||||
|
||||
"fdiv v1.4s, v3.4s, v1.4s \n" // values / weights
|
||||
"fdiv v2.4s, v4.4s, v2.4s \n"
|
||||
|
||||
"fcvtzu v1.4s, v1.4s \n" // float to int
|
||||
"fcvtzu v2.4s, v2.4s \n" // float to int
|
||||
"uqxtn v1.4h, v1.4s \n" // 8 shorts
|
||||
"uqxtn2 v1.8h, v2.4s \n"
|
||||
"uqxtn v1.8b, v1.8h \n" // 8 bytes
|
||||
|
||||
"st1 {v1.8b}, [%2], #8 \n" // store 8 byte out
|
||||
|
||||
"fcmgt v5.4s, v1.4s, v0.4s \n" // cmp weight to zero
|
||||
"fcmgt v6.4s, v2.4s, v0.4s \n"
|
||||
"uqxtn v5.4h, v5.4s \n" // 8 shorts
|
||||
"uqxtn2 v5.8h, v6.4s \n"
|
||||
"uqxtn v5.8b, v1.8h \n" // 8 bytes
|
||||
|
||||
"st1 {v5.8b}, [%3], #8 \n" // store 8 byte mask
|
||||
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_weights), // %0
|
||||
"+r"(src_values), // %1
|
||||
"+r"(dst_out), // %2
|
||||
"+r"(dst_mask), // %3
|
||||
"+r"(width) // %4
|
||||
:
|
||||
: "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -680,7 +680,7 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
|
||||
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
|
||||
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
||||
|
||||
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,\
|
||||
W1280, DIFF, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
@ -716,9 +716,9 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
||||
align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \
|
||||
memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \
|
||||
memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \
|
||||
FMT_B##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
|
||||
FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
|
||||
kHeight); \
|
||||
FMT_B##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
|
||||
FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
|
||||
kHeight); \
|
||||
int max_diff = 0; \
|
||||
for (int i = 0; i < kHeight; ++i) { \
|
||||
@ -740,25 +740,27 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
||||
free_aligned_buffer_page_end(dst_argb32_opt); \
|
||||
}
|
||||
|
||||
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
|
||||
BPP_B, DIFF) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_ - 4, DIFF, _Any, +, 0) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, DIFF, _Unaligned, +, 1) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, DIFF, _Invert, -, 0) \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
|
||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||
benchmark_width_, DIFF, _Opt, +, 0)
|
||||
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RAW, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RAW, 3, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
|
||||
|
||||
#ifdef DO_THREE_PLANES
|
||||
// Do 3 allocations for yuv. conventional but slower.
|
||||
@ -978,6 +980,7 @@ TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
|
||||
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
|
||||
|
||||
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
|
||||
|
||||
@ -3267,4 +3267,85 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
|
||||
EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
|
||||
}
|
||||
|
||||
float TestFloatDivToByte(int benchmark_width,
|
||||
int benchmark_height,
|
||||
int benchmark_iterations,
|
||||
float scale,
|
||||
bool opt) {
|
||||
int i, j;
|
||||
// NEON does multiple of 8, so round count up
|
||||
const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
|
||||
align_buffer_page_end(src_weights, kPixels * 4);
|
||||
align_buffer_page_end(src_values, kPixels * 4);
|
||||
align_buffer_page_end(dst_out_c, kPixels);
|
||||
align_buffer_page_end(dst_out_opt, kPixels);
|
||||
align_buffer_page_end(dst_mask_c, kPixels);
|
||||
align_buffer_page_end(dst_mask_opt, kPixels);
|
||||
|
||||
// Randomize works but may contain some denormals affecting performance.
|
||||
// MemRandomize(orig_y, kPixels * 4);
|
||||
// large values are problematic. audio is really -1 to 1.
|
||||
for (i = 0; i < kPixels; ++i) {
|
||||
(reinterpret_cast<float*>(src_weights))[i] = scale;
|
||||
(reinterpret_cast<float*>(src_values))[i] = sinf(static_cast<float>(i) * 0.1f);
|
||||
}
|
||||
memset(dst_out_c, 0, kPixels);
|
||||
memset(dst_out_opt, 1, kPixels);
|
||||
memset(dst_mask_c, 2, kPixels);
|
||||
memset(dst_mask_opt, 3, kPixels);
|
||||
|
||||
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
||||
reinterpret_cast<float*>(src_values),
|
||||
dst_out_c, dst_mask_c, kPixels);
|
||||
|
||||
for (j = 0; j < benchmark_iterations; j++) {
|
||||
if (opt) {
|
||||
#ifdef HAS_FLOATDIVTOBYTEROW_NEON
|
||||
FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
|
||||
reinterpret_cast<float*>(src_values),
|
||||
dst_out_opt, dst_mask_opt, kPixels);
|
||||
#else
|
||||
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
||||
reinterpret_cast<float*>(src_values),
|
||||
dst_out_opt, dst_mask_opt, kPixels);
|
||||
#endif
|
||||
} else {
|
||||
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
||||
reinterpret_cast<float*>(src_values),
|
||||
dst_out_opt, dst_mask_opt, kPixels);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t max_diff = 0;
|
||||
for (i = 0; i < kPixels; ++i) {
|
||||
uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) +
|
||||
abs(dst_mask_c[i] - dst_mask_opt[i]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(src_weights);
|
||||
free_aligned_buffer_page_end(src_values);
|
||||
free_aligned_buffer_page_end(dst_out_c);
|
||||
free_aligned_buffer_page_end(dst_out_opt);
|
||||
free_aligned_buffer_page_end(dst_mask_c);
|
||||
free_aligned_buffer_page_end(dst_mask_opt);
|
||||
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) {
|
||||
float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, 1.2f, false);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) {
|
||||
float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, 1.2f, true);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user