mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 09:16:48 +08:00
Add AYUVToNV12 and NV21ToNV12
BUG=libyuv:832 TESTED=out/Release/libyuv_unittest --gtest_filter=*ToNV12* --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1 R=rrwinterton@gmail.com Change-Id: Id03b4613211fb6a6e163d10daa7c692fe31e36d8 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1560080 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
4bd08cbc0e
commit
413a8d8041
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1725
|
Version: 1727
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -226,6 +226,17 @@ int UYVYToI420(const uint8_t* src_uyvy,
|
|||||||
int width,
|
int width,
|
||||||
int height);
|
int height);
|
||||||
|
|
||||||
|
// Convert AYUV to NV12.
|
||||||
|
LIBYUV_API
|
||||||
|
int AYUVToNV12(const uint8_t* src_ayuv,
|
||||||
|
int src_stride_ayuv,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int dst_stride_y,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_stride_uv,
|
||||||
|
int width,
|
||||||
|
int height);
|
||||||
|
|
||||||
// Convert AYUV to NV21.
|
// Convert AYUV to NV21.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int AYUVToNV21(const uint8_t* src_ayuv,
|
int AYUVToNV21(const uint8_t* src_ayuv,
|
||||||
|
|||||||
@ -224,6 +224,19 @@ int UYVYToNV12(const uint8_t* src_uyvy,
|
|||||||
int width,
|
int width,
|
||||||
int height);
|
int height);
|
||||||
|
|
||||||
|
// Convert NV21 to NV12.
|
||||||
|
LIBYUV_API
|
||||||
|
int NV21ToNV12(const uint8_t* src_y,
|
||||||
|
int src_stride_y,
|
||||||
|
const uint8_t* src_vu,
|
||||||
|
int src_stride_vu,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int dst_stride_y,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_stride_uv,
|
||||||
|
int width,
|
||||||
|
int height);
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int YUY2ToY(const uint8_t* src_yuy2,
|
int YUY2ToY(const uint8_t* src_yuy2,
|
||||||
int src_stride_yuy2,
|
int src_stride_yuy2,
|
||||||
|
|||||||
@ -332,6 +332,7 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOUVROW_NEON
|
#define HAS_ARGBTOUVROW_NEON
|
||||||
#define HAS_ARGBTOYJROW_NEON
|
#define HAS_ARGBTOYJROW_NEON
|
||||||
#define HAS_ARGBTOYROW_NEON
|
#define HAS_ARGBTOYROW_NEON
|
||||||
|
#define HAS_AYUVTOUVROW_NEON
|
||||||
#define HAS_AYUVTOVUROW_NEON
|
#define HAS_AYUVTOVUROW_NEON
|
||||||
#define HAS_AYUVTOYROW_NEON
|
#define HAS_AYUVTOYROW_NEON
|
||||||
#define HAS_BGRATOUVROW_NEON
|
#define HAS_BGRATOUVROW_NEON
|
||||||
@ -375,6 +376,7 @@ extern "C" {
|
|||||||
#define HAS_SETROW_NEON
|
#define HAS_SETROW_NEON
|
||||||
#define HAS_SPLITRGBROW_NEON
|
#define HAS_SPLITRGBROW_NEON
|
||||||
#define HAS_SPLITUVROW_NEON
|
#define HAS_SPLITUVROW_NEON
|
||||||
|
#define HAS_UVToVUROW_NEON
|
||||||
#define HAS_UYVYTOARGBROW_NEON
|
#define HAS_UYVYTOARGBROW_NEON
|
||||||
#define HAS_UYVYTOUV422ROW_NEON
|
#define HAS_UYVYTOUV422ROW_NEON
|
||||||
#define HAS_UYVYTOUVROW_NEON
|
#define HAS_UYVYTOUVROW_NEON
|
||||||
@ -3370,17 +3372,34 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
|
void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||||
|
void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||||
|
void UVToVURow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
|
||||||
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||||
void AYUVToVURow_C(const uint8_t* src_ayuv, int stride_ayuv,
|
void AYUVToUVRow_C(const uint8_t* src_ayuv,
|
||||||
|
int stride_ayuv,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int width);
|
||||||
|
void AYUVToVURow_C(const uint8_t* src_ayuv,
|
||||||
|
int stride_ayuv,
|
||||||
uint8_t* dst_vu,
|
uint8_t* dst_vu,
|
||||||
int width);
|
int width);
|
||||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv, int stride_ayuv,
|
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
|
||||||
|
int stride_ayuv,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int width);
|
||||||
|
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||||
|
int stride_ayuv,
|
||||||
uint8_t* dst_vu,
|
uint8_t* dst_vu,
|
||||||
int width);
|
int width);
|
||||||
void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
|
||||||
void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv, int stride_ayuv,
|
void AYUVToUVRow_Any_NEON(const uint8_t* src_ayuv,
|
||||||
|
int stride_ayuv,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int width);
|
||||||
|
void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv,
|
||||||
|
int stride_ayuv,
|
||||||
uint8_t* dst_vu,
|
uint8_t* dst_vu,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
@ -4010,7 +4029,6 @@ void FloatDivToByteRow_NEON(const float* src_weights,
|
|||||||
uint8_t* dst_mask,
|
uint8_t* dst_mask,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1725
|
#define LIBYUV_VERSION 1727
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -880,6 +880,75 @@ int UYVYToI420(const uint8_t* src_uyvy,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert AYUV to NV12.
|
||||||
|
LIBYUV_API
|
||||||
|
int AYUVToNV12(const uint8_t* src_ayuv,
|
||||||
|
int src_stride_ayuv,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int dst_stride_y,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_stride_uv,
|
||||||
|
int width,
|
||||||
|
int height) {
|
||||||
|
int y;
|
||||||
|
void (*AYUVToUVRow)(const uint8_t* src_ayuv, int src_stride_ayuv,
|
||||||
|
uint8_t* dst_uv, int width) = AYUVToUVRow_C;
|
||||||
|
void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
|
||||||
|
AYUVToYRow_C;
|
||||||
|
// Negative height means invert the image.
|
||||||
|
if (height < 0) {
|
||||||
|
height = -height;
|
||||||
|
src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv;
|
||||||
|
src_stride_ayuv = -src_stride_ayuv;
|
||||||
|
}
|
||||||
|
// place holders for future intel code
|
||||||
|
#if defined(HAS_AYUVTOYROW_SSE2)
|
||||||
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
|
AYUVToUVRow = AYUVToUVRow_Any_SSE2;
|
||||||
|
AYUVToYRow = AYUVToYRow_Any_SSE2;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
AYUVToUVRow = AYUVToUVRow_SSE2;
|
||||||
|
AYUVToYRow = AYUVToYRow_SSE2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_AYUVTOYROW_AVX2)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
|
AYUVToUVRow = AYUVToUVRow_Any_AVX2;
|
||||||
|
AYUVToYRow = AYUVToYRow_Any_AVX2;
|
||||||
|
if (IS_ALIGNED(width, 32)) {
|
||||||
|
AYUVToUVRow = AYUVToUVRow_AVX2;
|
||||||
|
AYUVToYRow = AYUVToYRow_AVX2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAS_AYUVTOYROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
AYUVToYRow = AYUVToYRow_Any_NEON;
|
||||||
|
AYUVToUVRow = AYUVToUVRow_Any_NEON;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
AYUVToYRow = AYUVToYRow_NEON;
|
||||||
|
AYUVToUVRow = AYUVToUVRow_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (y = 0; y < height - 1; y += 2) {
|
||||||
|
AYUVToUVRow(src_ayuv, src_stride_ayuv, dst_uv, width);
|
||||||
|
AYUVToYRow(src_ayuv, dst_y, width);
|
||||||
|
AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width);
|
||||||
|
src_ayuv += src_stride_ayuv * 2;
|
||||||
|
dst_y += dst_stride_y * 2;
|
||||||
|
dst_uv += dst_stride_uv;
|
||||||
|
}
|
||||||
|
if (height & 1) {
|
||||||
|
AYUVToUVRow(src_ayuv, 0, dst_uv, width);
|
||||||
|
AYUVToYRow(src_ayuv, dst_y, width);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Convert AYUV to NV21.
|
// Convert AYUV to NV21.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int AYUVToNV21(const uint8_t* src_ayuv,
|
int AYUVToNV21(const uint8_t* src_ayuv,
|
||||||
@ -892,8 +961,7 @@ int AYUVToNV21(const uint8_t* src_ayuv,
|
|||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*AYUVToVURow)(const uint8_t* src_ayuv, int src_stride_ayuv,
|
void (*AYUVToVURow)(const uint8_t* src_ayuv, int src_stride_ayuv,
|
||||||
uint8_t* dst_vu, int width) =
|
uint8_t* dst_vu, int width) = AYUVToVURow_C;
|
||||||
AYUVToVURow_C;
|
|
||||||
void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
|
void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
|
||||||
AYUVToYRow_C;
|
AYUVToYRow_C;
|
||||||
// Negative height means invert the image.
|
// Negative height means invert the image.
|
||||||
@ -2235,7 +2303,6 @@ int Android420ToI420(const uint8_t* src_y,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -2008,10 +2008,8 @@ int NV21ToYUV24(const uint8_t* src_y,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*NV21ToYUV24Row)(const uint8_t* src_y,
|
void (*NV21ToYUV24Row)(const uint8_t* src_y, const uint8_t* src_vu,
|
||||||
const uint8_t* src_vu,
|
uint8_t* dst_yuv24, int width) = NV21ToYUV24Row_C;
|
||||||
uint8_t* dst_yuv24,
|
|
||||||
int width) = NV21ToYUV24Row_C;
|
|
||||||
if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0) {
|
if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -440,7 +440,6 @@ void MergeUVPlane(const uint8_t* src_u,
|
|||||||
int y;
|
int y;
|
||||||
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
|
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
|
||||||
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||||
// Coalesce rows.
|
|
||||||
// Negative height means invert the image.
|
// Negative height means invert the image.
|
||||||
if (height < 0) {
|
if (height < 0) {
|
||||||
height = -height;
|
height = -height;
|
||||||
@ -504,6 +503,63 @@ void MergeUVPlane(const uint8_t* src_u,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert NV21 to NV12.
|
||||||
|
LIBYUV_API
|
||||||
|
int NV21ToNV12(const uint8_t* src_y,
|
||||||
|
int src_stride_y,
|
||||||
|
const uint8_t* src_vu,
|
||||||
|
int src_stride_vu,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int dst_stride_y,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_stride_uv,
|
||||||
|
int width,
|
||||||
|
int height) {
|
||||||
|
int y;
|
||||||
|
void (*UVToVURow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
|
||||||
|
UVToVURow_C;
|
||||||
|
|
||||||
|
int halfwidth = (width + 1) >> 1;
|
||||||
|
int halfheight = (height + 1) >> 1;
|
||||||
|
if (!src_vu || !dst_uv || width <= 0 || height == 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Negative height means invert the image.
|
||||||
|
if (height < 0) {
|
||||||
|
height = -height;
|
||||||
|
halfheight = (height + 1) >> 1;
|
||||||
|
src_y = src_y + (height - 1) * src_stride_y;
|
||||||
|
src_vu = src_vu + (halfheight - 1) * src_stride_vu;
|
||||||
|
src_stride_y = -src_stride_y;
|
||||||
|
src_stride_vu = -src_stride_vu;
|
||||||
|
}
|
||||||
|
// Coalesce rows.
|
||||||
|
if (src_stride_vu == halfwidth * 2 && dst_stride_uv == halfwidth * 2) {
|
||||||
|
halfwidth *= halfheight;
|
||||||
|
halfheight = 1;
|
||||||
|
src_stride_vu = dst_stride_uv = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(HAS_UVToVUROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
UVToVURow = UVToVURow_Any_NEON;
|
||||||
|
if (IS_ALIGNED(halfwidth, 16)) {
|
||||||
|
UVToVURow = UVToVURow_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (dst_y) {
|
||||||
|
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (y = 0; y < halfheight; ++y) {
|
||||||
|
UVToVURow(src_vu, dst_uv, halfwidth);
|
||||||
|
src_vu += src_stride_vu;
|
||||||
|
dst_uv += dst_stride_uv;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Support function for NV12 etc RGB channels.
|
// Support function for NV12 etc RGB channels.
|
||||||
// Width and height are plane sizes (typically half pixel width).
|
// Width and height are plane sizes (typically half pixel width).
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
@ -707,10 +707,12 @@ ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
|
|||||||
#ifdef HAS_UYVYTOYROW_MMI
|
#ifdef HAS_UYVYTOYROW_MMI
|
||||||
ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
|
ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_AYUVTOYROW_NEON
|
#ifdef HAS_AYUVTOYROW_NEON
|
||||||
ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
|
ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAS_AYUVTOYROW_NEON
|
||||||
|
ANY11(UVToVURow_Any_NEON, UVToVURow_NEON, 0, 2, 2, 15)
|
||||||
|
#endif
|
||||||
#ifdef HAS_RGB24TOARGBROW_NEON
|
#ifdef HAS_RGB24TOARGBROW_NEON
|
||||||
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
|
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
|
||||||
#endif
|
#endif
|
||||||
@ -1416,6 +1418,7 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_AYUVTOVUROW_NEON
|
#ifdef HAS_AYUVTOVUROW_NEON
|
||||||
|
ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15)
|
||||||
ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
|
ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
|
||||||
#endif
|
#endif
|
||||||
#undef ANY11S
|
#undef ANY11S
|
||||||
|
|||||||
@ -3236,7 +3236,6 @@ void NV21ToYUV24Row_C(const uint8_t* src_y,
|
|||||||
const uint8_t* src_vu,
|
const uint8_t* src_vu,
|
||||||
uint8_t* dst_yuv24,
|
uint8_t* dst_yuv24,
|
||||||
int width) {
|
int width) {
|
||||||
|
|
||||||
int x;
|
int x;
|
||||||
for (x = 0; x < width - 1; x += 2) {
|
for (x = 0; x < width - 1; x += 2) {
|
||||||
dst_yuv24[0] = src_vu[0]; // V
|
dst_yuv24[0] = src_vu[0]; // V
|
||||||
@ -3256,6 +3255,33 @@ void NV21ToYUV24Row_C(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter 2 rows of AYUV UV's (444) into UV (420).
|
||||||
|
void AYUVToUVRow_C(const uint8_t* src_ayuv,
|
||||||
|
int src_stride_ayuv,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int width) {
|
||||||
|
// Output a row of UV values, filtering 2x2 rows of AYUV.
|
||||||
|
int x;
|
||||||
|
for (x = 0; x < width; x += 2) {
|
||||||
|
dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
|
||||||
|
src_ayuv[src_stride_ayuv + 5] + 2) >>
|
||||||
|
2;
|
||||||
|
dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
|
||||||
|
src_ayuv[src_stride_ayuv + 4] + 2) >>
|
||||||
|
2;
|
||||||
|
src_ayuv += 8;
|
||||||
|
dst_uv += 2;
|
||||||
|
}
|
||||||
|
if (width & 1) {
|
||||||
|
dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
|
||||||
|
src_ayuv[src_stride_ayuv + 0] + 2) >>
|
||||||
|
2;
|
||||||
|
dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
|
||||||
|
src_ayuv[src_stride_ayuv + 1] + 2) >>
|
||||||
|
2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Filter 2 rows of AYUV UV's (444) into VU (420).
|
// Filter 2 rows of AYUV UV's (444) into VU (420).
|
||||||
void AYUVToVURow_C(const uint8_t* src_ayuv,
|
void AYUVToVURow_C(const uint8_t* src_ayuv,
|
||||||
int src_stride_ayuv,
|
int src_stride_ayuv,
|
||||||
@ -3264,14 +3290,22 @@ void AYUVToVURow_C(const uint8_t* src_ayuv,
|
|||||||
// Output a row of VU values, filtering 2x2 rows of AYUV.
|
// Output a row of VU values, filtering 2x2 rows of AYUV.
|
||||||
int x;
|
int x;
|
||||||
for (x = 0; x < width; x += 2) {
|
for (x = 0; x < width; x += 2) {
|
||||||
dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] + src_ayuv[src_stride_ayuv + 4] + 2) >> 2;
|
dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
|
||||||
dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] + src_ayuv[src_stride_ayuv + 5] + 2) >> 2;
|
src_ayuv[src_stride_ayuv + 4] + 2) >>
|
||||||
|
2;
|
||||||
|
dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
|
||||||
|
src_ayuv[src_stride_ayuv + 5] + 2) >>
|
||||||
|
2;
|
||||||
src_ayuv += 8;
|
src_ayuv += 8;
|
||||||
dst_vu += 2;
|
dst_vu += 2;
|
||||||
}
|
}
|
||||||
if (width & 1) {
|
if (width & 1) {
|
||||||
dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + src_ayuv[src_stride_ayuv + 0] + 2) >> 2;
|
dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
|
||||||
dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + src_ayuv[src_stride_ayuv + 1] + 2) >> 2;
|
src_ayuv[src_stride_ayuv + 0] + 2) >>
|
||||||
|
2;
|
||||||
|
dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
|
||||||
|
src_ayuv[src_stride_ayuv + 1] + 2) >>
|
||||||
|
2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3285,6 +3319,18 @@ void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||||
|
int x;
|
||||||
|
for (x = 0; x < width; ++x) {
|
||||||
|
uint8_t u = src_uv[0];
|
||||||
|
uint8_t v = src_uv[1];
|
||||||
|
dst_vu[0] = v;
|
||||||
|
dst_vu[1] = u;
|
||||||
|
src_uv += 2;
|
||||||
|
dst_vu += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// divide values by weights and provide mask to indicate weight of 0.
|
// divide values by weights and provide mask to indicate weight of 0.
|
||||||
void FloatDivToByteRow_C(const float* src_weights,
|
void FloatDivToByteRow_C(const float* src_weights,
|
||||||
const float* src_values,
|
const float* src_values,
|
||||||
|
|||||||
@ -6669,7 +6669,6 @@ void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAS_NV21TOYUV24ROW_AVX2
|
#ifdef HAS_NV21TOYUV24ROW_AVX2
|
||||||
|
|
||||||
// begin NV21ToYUV24Row_C avx2 constants
|
// begin NV21ToYUV24Row_C avx2 constants
|
||||||
@ -6723,7 +6722,6 @@ void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
|||||||
const uint8_t* src_vu,
|
const uint8_t* src_vu,
|
||||||
uint8_t* dst_yuv24,
|
uint8_t* dst_yuv24,
|
||||||
int width) {
|
int width) {
|
||||||
|
|
||||||
uint8_t* src_y_ptr;
|
uint8_t* src_y_ptr;
|
||||||
uint64_t src_offset = 0;
|
uint64_t src_offset = 0;
|
||||||
uint64_t width64;
|
uint64_t width64;
|
||||||
@ -6743,10 +6741,13 @@ void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
|||||||
"vmovdqu 1(%1,%4), %%ymm4 \n" // src_uv+1
|
"vmovdqu 1(%1,%4), %%ymm4 \n" // src_uv+1
|
||||||
"vmovdqu (%1), %%ymm5 \n" // src_uv
|
"vmovdqu (%1), %%ymm5 \n" // src_uv
|
||||||
"vpshufb %8, %%ymm3, %%ymm13 \n" // y, kSHUF0 for shuf
|
"vpshufb %8, %%ymm3, %%ymm13 \n" // y, kSHUF0 for shuf
|
||||||
"vpshufb %9, %%ymm4, %%ymm14 \n" //uv+1, kSHUF1 for shuf
|
"vpshufb %9, %%ymm4, %%ymm14 \n" // uv+1, kSHUF1 for
|
||||||
"vpshufb %10, %%ymm5, %%ymm15 \n" //uv, kSHUF2 for shuf
|
// shuf
|
||||||
|
"vpshufb %10, %%ymm5, %%ymm15 \n" // uv, kSHUF2 for
|
||||||
|
// shuf
|
||||||
"vpshufb %11, %%ymm3, %%ymm3 \n" // y kSHUF3 for shuf
|
"vpshufb %11, %%ymm3, %%ymm3 \n" // y kSHUF3 for shuf
|
||||||
"vpshufb %12, %%ymm4, %%ymm4 \n" //uv+1 kSHUF4 for shuf
|
"vpshufb %12, %%ymm4, %%ymm4 \n" // uv+1 kSHUF4 for
|
||||||
|
// shuf
|
||||||
"vpblendvb %%ymm0, %%ymm14, %%ymm13, %%ymm12 \n" // blend 0
|
"vpblendvb %%ymm0, %%ymm14, %%ymm13, %%ymm12 \n" // blend 0
|
||||||
"vpblendvb %%ymm0, %%ymm13, %%ymm14, %%ymm14 \n" // blend 0
|
"vpblendvb %%ymm0, %%ymm13, %%ymm14, %%ymm14 \n" // blend 0
|
||||||
"vpblendvb %%ymm2, %%ymm15, %%ymm12, %%ymm12 \n" // blend 2
|
"vpblendvb %%ymm2, %%ymm15, %%ymm12, %%ymm12 \n" // blend 2
|
||||||
@ -6755,16 +6756,20 @@ void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
|||||||
"vpor %%ymm4, %%ymm3, %%ymm5 \n" // get results
|
"vpor %%ymm4, %%ymm3, %%ymm5 \n" // get results
|
||||||
"vmovdqu %%ymm12, 0x20(%2) \n" // store dst_yuv+20h
|
"vmovdqu %%ymm12, 0x20(%2) \n" // store dst_yuv+20h
|
||||||
"vpor %%ymm15, %%ymm5, %%ymm3 \n" // get results
|
"vpor %%ymm15, %%ymm5, %%ymm3 \n" // get results
|
||||||
"add $0x20, %4 \n" //add to src buffer ptr
|
"add $0x20, %4 \n" // add to src buffer
|
||||||
|
// ptr
|
||||||
"vinserti128 $0x1, %%xmm3, %%ymm13, %%ymm4 \n" // insert
|
"vinserti128 $0x1, %%xmm3, %%ymm13, %%ymm4 \n" // insert
|
||||||
"vperm2i128 $0x31, %%ymm13, %%ymm3, %%ymm5 \n" // insert
|
"vperm2i128 $0x31, %%ymm13, %%ymm3, %%ymm5 \n" // insert
|
||||||
"vmovdqu %%ymm4, (%2) \n" // store dst_yuv
|
"vmovdqu %%ymm4, (%2) \n" // store dst_yuv
|
||||||
"vmovdqu %%ymm5, 0x40(%2) \n" // store dst_yuv+40h
|
"vmovdqu %%ymm5, 0x40(%2) \n" // store dst_yuv+40h
|
||||||
"add $0x60,%2 \n" //add to dst buffer ptr
|
"add $0x60,%2 \n" // add to dst buffer
|
||||||
// "cmp %3, %4 \n" //(width64 - 32 bytes) and src_offset
|
// ptr
|
||||||
|
// "cmp %3, %4 \n" //(width64 -
|
||||||
|
// 32 bytes) and src_offset
|
||||||
"sub $0x20,%3 \n" // 32 pixels per loop
|
"sub $0x20,%3 \n" // 32 pixels per loop
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n" //sse-avx2 transistions
|
"vzeroupper \n" // sse-avx2
|
||||||
|
// transistions
|
||||||
|
|
||||||
: "+r"(src_y), //%0
|
: "+r"(src_y), //%0
|
||||||
"+r"(src_vu), //%1
|
"+r"(src_vu), //%1
|
||||||
@ -6780,7 +6785,8 @@ void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
|
|||||||
"m"(kSHUF3), //%11
|
"m"(kSHUF3), //%11
|
||||||
"m"(kSHUF4), //%12
|
"m"(kSHUF4), //%12
|
||||||
"m"(kSHUF5) //%13
|
"m"(kSHUF5) //%13
|
||||||
: "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm12", "xmm13", "xmm14", "xmm15");
|
: "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm12",
|
||||||
|
"xmm13", "xmm14", "xmm15");
|
||||||
}
|
}
|
||||||
#endif // HAS_NV21TOYUV24ROW_AVX2
|
#endif // HAS_NV21TOYUV24ROW_AVX2
|
||||||
|
|
||||||
|
|||||||
@ -2710,6 +2710,37 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
|||||||
: "cc", "memory", "q0", "q1", "q2");
|
: "cc", "memory", "q0", "q1", "q2");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
|
||||||
|
int src_stride_ayuv,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int width) {
|
||||||
|
asm volatile(
|
||||||
|
"add %1, %0, %1 \n" // src_stride + src_AYUV
|
||||||
|
"1: \n"
|
||||||
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
|
||||||
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV
|
||||||
|
// pixels.
|
||||||
|
"vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
|
||||||
|
"vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
|
||||||
|
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV
|
||||||
|
// pixels.
|
||||||
|
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV
|
||||||
|
// pixels.
|
||||||
|
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
||||||
|
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||||
|
"vqrshrun.s16 d1, q0, #2 \n" // 2x2 average
|
||||||
|
"vqrshrun.s16 d0, q1, #2 \n"
|
||||||
|
"subs %3, %3, #16 \n" // 16 processed per loop.
|
||||||
|
"vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels UV.
|
||||||
|
"bgt 1b \n"
|
||||||
|
: "+r"(src_ayuv), // %0
|
||||||
|
"+r"(src_stride_ayuv), // %1
|
||||||
|
"+r"(dst_uv), // %2
|
||||||
|
"+r"(width) // %3
|
||||||
|
:
|
||||||
|
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
|
||||||
|
}
|
||||||
|
|
||||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||||
int src_stride_ayuv,
|
int src_stride_ayuv,
|
||||||
uint8_t* dst_vu,
|
uint8_t* dst_vu,
|
||||||
@ -2718,11 +2749,14 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
|||||||
"add %1, %0, %1 \n" // src_stride + src_AYUV
|
"add %1, %0, %1 \n" // src_stride + src_AYUV
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
|
||||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels.
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV
|
||||||
|
// pixels.
|
||||||
"vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
|
"vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
|
||||||
"vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
|
"vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
|
||||||
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV pixels.
|
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV
|
||||||
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV pixels.
|
// pixels.
|
||||||
|
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV
|
||||||
|
// pixels.
|
||||||
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
||||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||||
"vqrshrun.s16 d0, q0, #2 \n" // 2x2 average
|
"vqrshrun.s16 d0, q0, #2 \n" // 2x2 average
|
||||||
@ -2735,8 +2769,7 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
|||||||
"+r"(dst_vu), // %2
|
"+r"(dst_vu), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
:
|
:
|
||||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"
|
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy row of AYUV Y's into Y.
|
// Copy row of AYUV Y's into Y.
|
||||||
@ -2756,6 +2789,23 @@ void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
|||||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert biplanar UV channel of NV12 to NV21
|
||||||
|
void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||||
|
asm volatile(
|
||||||
|
"1: \n"
|
||||||
|
"vld2.8 {d0, d2}, [%0]! \n" // load 16 UV values
|
||||||
|
"vld2.8 {d1, d3}, [%0]! \n"
|
||||||
|
"vorr.u8 q2, q0, q0 \n" // move U after V
|
||||||
|
"subs %2, %2, #16 \n" // 16 pixels per loop
|
||||||
|
"vst2.8 {q1, q2}, [%1]! \n" // store 16 VU pixels
|
||||||
|
"bgt 1b \n"
|
||||||
|
: "+r"(src_uv), // %0
|
||||||
|
"+r"(dst_vu), // %1
|
||||||
|
"+r"(width) // %2
|
||||||
|
:
|
||||||
|
: "cc", "memory", "q0", "q1", "q2");
|
||||||
|
}
|
||||||
|
|
||||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
|
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@ -2898,6 +2898,34 @@ void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
|||||||
: "cc", "memory", "v0", "v1", "v2");
|
: "cc", "memory", "v0", "v1", "v2");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
|
||||||
|
int src_stride_ayuv,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int width) {
|
||||||
|
const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
|
||||||
|
asm volatile(
|
||||||
|
|
||||||
|
"1: \n"
|
||||||
|
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
|
||||||
|
// pixels.
|
||||||
|
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
|
||||||
|
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
|
||||||
|
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
|
||||||
|
"uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
|
||||||
|
"uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
|
||||||
|
"uqrshrn v3.8b, v0.8h, #2 \n" // 2x2 average
|
||||||
|
"uqrshrn v2.8b, v1.8h, #2 \n"
|
||||||
|
"subs %w3, %w3, #16 \n" // 16 processed per loop.
|
||||||
|
"st2 {v2.8b,v3.8b}, [%2], #16 \n" // store 8 pixels UV.
|
||||||
|
"b.gt 1b \n"
|
||||||
|
: "+r"(src_ayuv), // %0
|
||||||
|
"+r"(src_ayuv_1), // %1
|
||||||
|
"+r"(dst_uv), // %2
|
||||||
|
"+r"(width) // %3
|
||||||
|
:
|
||||||
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||||
|
}
|
||||||
|
|
||||||
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
||||||
int src_stride_ayuv,
|
int src_stride_ayuv,
|
||||||
uint8_t* dst_vu,
|
uint8_t* dst_vu,
|
||||||
@ -2906,7 +2934,8 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
|
||||||
|
// pixels.
|
||||||
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
|
"uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
|
||||||
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
|
"uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
|
||||||
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
|
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
|
||||||
@ -2922,15 +2951,15 @@ void AYUVToVURow_NEON(const uint8_t* src_ayuv,
|
|||||||
"+r"(dst_vu), // %2
|
"+r"(dst_vu), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
:
|
:
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy row of AYUV Y's into Y
|
// Copy row of AYUV Y's into Y
|
||||||
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels
|
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
|
||||||
|
// pixels
|
||||||
"subs %w2, %w2, #16 \n" // 16 pixels per loop
|
"subs %w2, %w2, #16 \n" // 16 pixels per loop
|
||||||
"st1 {v2.16b}, [%1], #16 \n" // store 16 Y pixels
|
"st1 {v2.16b}, [%1], #16 \n" // store 16 Y pixels
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -2983,6 +3012,22 @@ void FloatDivToByteRow_NEON(const float* src_weights,
|
|||||||
: "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
|
: "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert biplanar UV channel of NV12 to NV21
|
||||||
|
void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||||
|
asm volatile(
|
||||||
|
"1: \n"
|
||||||
|
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 UV values
|
||||||
|
"orr v2.16b, v0.16b, v0.16b \n" // move U after V
|
||||||
|
"subs %w2, %w2, #16 \n" // 16 pixels per loop
|
||||||
|
"st2 {v1.16b, v2.16b}, [%1], #32 \n" // store 16 VU pixels
|
||||||
|
"b.gt 1b \n"
|
||||||
|
: "+r"(src_uv), // %0
|
||||||
|
"+r"(dst_vu), // %1
|
||||||
|
"+r"(width) // %2
|
||||||
|
:
|
||||||
|
: "cc", "memory", "v0", "v1", "v2");
|
||||||
|
}
|
||||||
|
|
||||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@ -311,10 +311,10 @@ int I400ToNV21(const uint8_t* src_y,
|
|||||||
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||||
OFF); \
|
OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||||
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
|
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||||
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
|
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kWidth; ++j) \
|
for (int j = 0; j < kWidth; ++j) \
|
||||||
@ -329,21 +329,21 @@ int I400ToNV21(const uint8_t* src_y,
|
|||||||
} \
|
} \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||||
memset(dst_uv_c, 2, \
|
memset(dst_uv_c, 2, \
|
||||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||||
memset(dst_uv_opt, 102, \
|
memset(dst_uv_opt, 102, \
|
||||||
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
|
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
|
||||||
dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
|
dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||||
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
|
||||||
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
|
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
|
||||||
dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
|
dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
int max_diff = 0; \
|
int max_diff = 0; \
|
||||||
for (int i = 0; i < kHeight; ++i) { \
|
for (int i = 0; i < kHeight; ++i) { \
|
||||||
@ -357,12 +357,12 @@ int I400ToNV21(const uint8_t* src_y,
|
|||||||
} \
|
} \
|
||||||
EXPECT_LE(max_diff, 1); \
|
EXPECT_LE(max_diff, 1); \
|
||||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||||
for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \
|
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
|
||||||
int abs_diff = \
|
int abs_diff = \
|
||||||
abs(static_cast<int>( \
|
abs(static_cast<int>( \
|
||||||
dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \
|
dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
|
||||||
static_cast<int>( \
|
static_cast<int>( \
|
||||||
dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \
|
dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
|
||||||
if (abs_diff > max_diff) { \
|
if (abs_diff > max_diff) { \
|
||||||
max_diff = abs_diff; \
|
max_diff = abs_diff; \
|
||||||
} \
|
} \
|
||||||
@ -395,6 +395,99 @@ TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2)
|
|||||||
TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2)
|
TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2)
|
||||||
TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
|
TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
|
||||||
|
|
||||||
|
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||||
|
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \
|
||||||
|
OFF) \
|
||||||
|
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
|
||||||
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
|
const int kHeight = benchmark_height_; \
|
||||||
|
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
||||||
|
align_buffer_page_end(src_uv, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2 * \
|
||||||
|
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
|
||||||
|
OFF); \
|
||||||
|
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
||||||
|
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
||||||
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
|
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
||||||
|
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
|
||||||
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
|
for (int j = 0; j < kWidth; ++j) \
|
||||||
|
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||||
|
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
|
||||||
|
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
|
||||||
|
src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 0 + OFF] = \
|
||||||
|
(fastrand() & 0xff); \
|
||||||
|
src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 1 + OFF] = \
|
||||||
|
(fastrand() & 0xff); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
memset(dst_y_c, 1, kWidth* kHeight); \
|
||||||
|
memset(dst_uv_c, 2, \
|
||||||
|
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
|
memset(dst_y_opt, 101, kWidth* kHeight); \
|
||||||
|
memset(dst_uv_opt, 102, \
|
||||||
|
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
|
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||||
|
src_y + OFF, kWidth, src_uv + OFF, \
|
||||||
|
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_c, kWidth, dst_uv_c, \
|
||||||
|
SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
|
||||||
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
|
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||||
|
src_y + OFF, kWidth, src_uv + OFF, \
|
||||||
|
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_opt, kWidth, dst_uv_opt, \
|
||||||
|
SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
|
||||||
|
} \
|
||||||
|
int max_diff = 0; \
|
||||||
|
for (int i = 0; i < kHeight; ++i) { \
|
||||||
|
for (int j = 0; j < kWidth; ++j) { \
|
||||||
|
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
|
||||||
|
static_cast<int>(dst_y_opt[i * kWidth + j])); \
|
||||||
|
if (abs_diff > max_diff) { \
|
||||||
|
max_diff = abs_diff; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
EXPECT_LE(max_diff, 1); \
|
||||||
|
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||||
|
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
|
||||||
|
int abs_diff = \
|
||||||
|
abs(static_cast<int>( \
|
||||||
|
dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
|
||||||
|
static_cast<int>( \
|
||||||
|
dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
|
||||||
|
if (abs_diff > max_diff) { \
|
||||||
|
max_diff = abs_diff; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
EXPECT_LE(max_diff, 1); \
|
||||||
|
free_aligned_buffer_page_end(dst_y_c); \
|
||||||
|
free_aligned_buffer_page_end(dst_uv_c); \
|
||||||
|
free_aligned_buffer_page_end(dst_y_opt); \
|
||||||
|
free_aligned_buffer_page_end(dst_uv_opt); \
|
||||||
|
free_aligned_buffer_page_end(src_y); \
|
||||||
|
free_aligned_buffer_page_end(src_uv); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||||
|
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
|
||||||
|
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||||
|
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) \
|
||||||
|
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||||
|
SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \
|
||||||
|
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||||
|
SUBSAMP_X, SUBSAMP_Y, benchmark_width, _Unaligned, +, 1) \
|
||||||
|
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||||
|
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \
|
||||||
|
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
|
||||||
|
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0)
|
||||||
|
|
||||||
|
// TODO(fbarchard): Fix msan on this unittest
|
||||||
|
// TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2)
|
||||||
|
|
||||||
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
|
||||||
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \
|
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \
|
||||||
DOY) \
|
DOY) \
|
||||||
@ -680,8 +773,8 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
|
|||||||
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
|
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
|
||||||
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
||||||
|
|
||||||
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B,\
|
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
|
||||||
W1280, DIFF, N, NEG, OFF) \
|
BPP_B, W1280, DIFF, N, NEG, OFF) \
|
||||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
@ -740,8 +833,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
|
|||||||
free_aligned_buffer_page_end(dst_argb32_opt); \
|
free_aligned_buffer_page_end(dst_argb32_opt); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
|
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||||
BPP_B, DIFF) \
|
DIFF) \
|
||||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||||
benchmark_width_ - 4, DIFF, _Any, +, 0) \
|
benchmark_width_ - 4, DIFF, _Any, +, 0) \
|
||||||
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||||
@ -980,6 +1073,7 @@ TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
|
|||||||
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
|
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
|
||||||
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
|
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
|
||||||
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
|
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
|
||||||
|
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
|
||||||
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
|
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
|
||||||
|
|
||||||
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||||
@ -1378,14 +1472,15 @@ TEST_F(LibYUVConvertTest, FuzzJpeg) {
|
|||||||
orig_pixels[0] = 0xff;
|
orig_pixels[0] = 0xff;
|
||||||
orig_pixels[1] = 0xd8; // SOI.
|
orig_pixels[1] = 0xd8; // SOI.
|
||||||
orig_pixels[kSize - 1] = 0xff;
|
orig_pixels[kSize - 1] = 0xff;
|
||||||
ValidateJpeg(orig_pixels, kSize); // Failure normally expected.
|
ValidateJpeg(orig_pixels,
|
||||||
|
kSize); // Failure normally expected.
|
||||||
free_aligned_buffer_page_end(orig_pixels);
|
free_aligned_buffer_page_end(orig_pixels);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test data created in GIMP. In export jpeg, disable thumbnails etc,
|
// Test data created in GIMP. In export jpeg, disable
|
||||||
// choose a subsampling, and use low quality (50) to keep size small.
|
// thumbnails etc, choose a subsampling, and use low quality
|
||||||
// Generated with xxd -i test.jpg
|
// (50) to keep size small. Generated with xxd -i test.jpg
|
||||||
// test 0 is J400
|
// test 0 is J400
|
||||||
static const uint8_t kTest0Jpg[] = {
|
static const uint8_t kTest0Jpg[] = {
|
||||||
0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
|
0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
|
||||||
@ -1987,8 +2082,8 @@ TEST_F(LibYUVConvertTest, TestMJPGInfo) {
|
|||||||
EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
|
EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
|
||||||
EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
|
EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
|
||||||
EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
|
EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
|
||||||
EXPECT_EQ(1,
|
EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg,
|
||||||
ShowJPegInfo(kTest4Jpg, kTest4JpgLen)); // Valid but unsupported.
|
kTest4JpgLen)); // Valid but unsupported.
|
||||||
}
|
}
|
||||||
#endif // HAVE_JPEG
|
#endif // HAVE_JPEG
|
||||||
|
|
||||||
@ -2906,7 +3001,8 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test 10 bit YUV to 10 bit RGB
|
// Test 10 bit YUV to 10 bit RGB
|
||||||
// Caveat: Result is near due to float rounding in expected result.
|
// Caveat: Result is near due to float rounding in expected
|
||||||
|
// result.
|
||||||
TEST_F(LibYUVConvertTest, TestH010ToAR30) {
|
TEST_F(LibYUVConvertTest, TestH010ToAR30) {
|
||||||
const int kSize = 1024;
|
const int kSize = 1024;
|
||||||
int histogram_b[1024];
|
int histogram_b[1024];
|
||||||
@ -2969,7 +3065,8 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test 10 bit YUV to 10 bit RGB
|
// Test 10 bit YUV to 10 bit RGB
|
||||||
// Caveat: Result is near due to float rounding in expected result.
|
// Caveat: Result is near due to float rounding in expected
|
||||||
|
// result.
|
||||||
TEST_F(LibYUVConvertTest, TestH010ToAB30) {
|
TEST_F(LibYUVConvertTest, TestH010ToAB30) {
|
||||||
const int kSize = 1024;
|
const int kSize = 1024;
|
||||||
int histogram_b[1024];
|
int histogram_b[1024];
|
||||||
|
|||||||
@ -3287,7 +3287,8 @@ float TestFloatDivToByte(int benchmark_width,
|
|||||||
// large values are problematic. audio is really -1 to 1.
|
// large values are problematic. audio is really -1 to 1.
|
||||||
for (i = 0; i < kPixels; ++i) {
|
for (i = 0; i < kPixels; ++i) {
|
||||||
(reinterpret_cast<float*>(src_weights))[i] = scale;
|
(reinterpret_cast<float*>(src_weights))[i] = scale;
|
||||||
(reinterpret_cast<float*>(src_values))[i] = sinf(static_cast<float>(i) * 0.1f);
|
(reinterpret_cast<float*>(src_values))[i] =
|
||||||
|
sinf(static_cast<float>(i) * 0.1f);
|
||||||
}
|
}
|
||||||
memset(dst_out_c, 0, kPixels);
|
memset(dst_out_c, 0, kPixels);
|
||||||
memset(dst_out_opt, 1, kPixels);
|
memset(dst_out_opt, 1, kPixels);
|
||||||
@ -3295,24 +3296,24 @@ float TestFloatDivToByte(int benchmark_width,
|
|||||||
memset(dst_mask_opt, 3, kPixels);
|
memset(dst_mask_opt, 3, kPixels);
|
||||||
|
|
||||||
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
||||||
reinterpret_cast<float*>(src_values),
|
reinterpret_cast<float*>(src_values), dst_out_c,
|
||||||
dst_out_c, dst_mask_c, kPixels);
|
dst_mask_c, kPixels);
|
||||||
|
|
||||||
for (j = 0; j < benchmark_iterations; j++) {
|
for (j = 0; j < benchmark_iterations; j++) {
|
||||||
if (opt) {
|
if (opt) {
|
||||||
#ifdef HAS_FLOATDIVTOBYTEROW_NEON
|
#ifdef HAS_FLOATDIVTOBYTEROW_NEON
|
||||||
FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
|
FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
|
||||||
reinterpret_cast<float*>(src_values),
|
reinterpret_cast<float*>(src_values), dst_out_opt,
|
||||||
dst_out_opt, dst_mask_opt, kPixels);
|
dst_mask_opt, kPixels);
|
||||||
#else
|
#else
|
||||||
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
||||||
reinterpret_cast<float*>(src_values),
|
reinterpret_cast<float*>(src_values), dst_out_opt,
|
||||||
dst_out_opt, dst_mask_opt, kPixels);
|
dst_mask_opt, kPixels);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
|
||||||
reinterpret_cast<float*>(src_values),
|
reinterpret_cast<float*>(src_values), dst_out_opt,
|
||||||
dst_out_opt, dst_mask_opt, kPixels);
|
dst_mask_opt, kPixels);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3347,5 +3348,23 @@ TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) {
|
|||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, UVToVURow) {
|
||||||
|
const int kPixels = benchmark_width_ * benchmark_height_;
|
||||||
|
align_buffer_page_end(src_pixels_vu, kPixels * 2);
|
||||||
|
align_buffer_page_end(dst_pixels_uv, kPixels * 2);
|
||||||
|
|
||||||
|
MemRandomize(src_pixels_vu, kPixels * 2);
|
||||||
|
memset(dst_pixels_uv, 1, kPixels * 2);
|
||||||
|
|
||||||
|
UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels);
|
||||||
|
|
||||||
|
for (int i = 0; i < kPixels; ++i) {
|
||||||
|
EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
|
||||||
|
EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
free_aligned_buffer_page_end(src_pixels_vu);
|
||||||
|
free_aligned_buffer_page_end(dst_pixels_uv);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user