mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
Add support for AR64 format
Add following conversions: ARGB,ABGR <-> AR64,AB64 AR64 <-> AB64 R=fbarchard@chromium.org Change-Id: I5ca5b40a98bffea11981e136afae4a511ba6c564 Bug: libyuv:886 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2746780 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
19bbedfd3e
commit
f37014fcff
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1780
|
||||
Version: 1781
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -54,12 +54,14 @@ The following is extracted from video_common.h as a complete list of formats sup
|
||||
// 1 Secondary YUV format: row biplanar.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'), // deprecated.
|
||||
|
||||
// 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
|
||||
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc, 2 64 bpp
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
|
||||
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
|
||||
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
|
||||
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
|
||||
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
|
||||
@ -180,6 +182,14 @@ The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values.
|
||||
The 10 bit RGB values range from 0 to 1023.
|
||||
XR30 is the same as AR30 but with no alpha channel.
|
||||
|
||||
# AB64 and AR64
|
||||
|
||||
AB64 is similar to ABGR, with 16 bit (2 bytes) per channel. Each channel stores an unsigned short.
|
||||
In memory R is the lowest and A is the highest.
|
||||
Each channel has value ranges from 0 to 65535.
|
||||
AR64 is similar to ARGB.
|
||||
|
||||
|
||||
# NV12 and NV21
|
||||
|
||||
NV12 is a biplanar format with a full sized Y plane followed by a single
|
||||
|
||||
@ -1065,6 +1065,42 @@ int AR30ToAB30(const uint8_t* src_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AR64 to ARGB.
|
||||
LIBYUV_API
|
||||
int AR64ToARGB(const uint16_t* src_ar64,
|
||||
int src_stride_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AB64 to ABGR.
|
||||
#define AB64ToABGR AR64ToARGB
|
||||
|
||||
// Convert AB64 to ARGB.
|
||||
LIBYUV_API
|
||||
int AB64ToARGB(const uint16_t* src_ab64,
|
||||
int src_stride_ab64,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AR64 to ABGR.
|
||||
#define AR64ToABGR AB64ToARGB
|
||||
|
||||
// Convert AR64 To AB64.
|
||||
LIBYUV_API
|
||||
int AR64ToAB64(const uint16_t* src_ar64,
|
||||
int src_stride_ar64,
|
||||
uint16_t* dst_ab64,
|
||||
int dst_stride_ab64,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AB64 To AR64.
|
||||
#define AB64ToAR64 AR64ToAB64
|
||||
|
||||
// src_width/height provided by capture
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
|
||||
@ -153,6 +153,30 @@ int ARGBToI444(const uint8_t* src_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to AR64.
|
||||
LIBYUV_API
|
||||
int ARGBToAR64(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint16_t* dst_ar64,
|
||||
int dst_stride_ar64,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR to AB64.
|
||||
#define ABGRToAB64 ARGBToAR64
|
||||
|
||||
// Convert ARGB to AB64.
|
||||
LIBYUV_API
|
||||
int ARGBToAB64(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint16_t* dst_ab64,
|
||||
int dst_stride_ab64,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ABGR to AR64.
|
||||
#define ABGRToAR64 ARGBToAB64
|
||||
|
||||
// Convert ARGB To I422.
|
||||
LIBYUV_API
|
||||
int ARGBToI422(const uint8_t* src_argb,
|
||||
|
||||
@ -945,7 +945,7 @@ void ARGBAffineRow_SSE2(const uint8_t* src_argb,
|
||||
int width);
|
||||
|
||||
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
|
||||
// shuffler is 16 bytes and must be aligned.
|
||||
// shuffler is 16 bytes.
|
||||
LIBYUV_API
|
||||
int ARGBShuffle(const uint8_t* src_bgra,
|
||||
int src_stride_bgra,
|
||||
@ -955,6 +955,17 @@ int ARGBShuffle(const uint8_t* src_bgra,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Shuffle AR64 channel order. e.g. AR64 to AB64.
|
||||
// shuffler is 16 bytes.
|
||||
LIBYUV_API
|
||||
int AR64Shuffle(const uint16_t* src_ar64,
|
||||
int src_stride_ar64,
|
||||
uint16_t* dst_ar64,
|
||||
int dst_stride_ar64,
|
||||
const uint8_t* shuffler,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Sobel ARGB effect with planar output.
|
||||
LIBYUV_API
|
||||
int ARGBSobelToPlane(const uint8_t* src_argb,
|
||||
|
||||
@ -273,6 +273,10 @@ extern "C" {
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
#define HAS_ABGRTOAR30ROW_SSSE3
|
||||
#define HAS_ARGBTOAR30ROW_SSSE3
|
||||
#define HAS_ARGBTOAR64ROW_SSSE3
|
||||
#define HAS_ARGBTOAB64ROW_SSSE3
|
||||
#define HAS_AR64TOARGBROW_SSSE3
|
||||
#define HAS_AB64TOARGBROW_SSSE3
|
||||
#define HAS_CONVERT16TO8ROW_SSSE3
|
||||
#define HAS_CONVERT8TO16ROW_SSE2
|
||||
#define HAS_HALFMERGEUVROW_SSSE3
|
||||
@ -318,6 +322,10 @@ extern "C" {
|
||||
#define HAS_ARGBTOAR30ROW_AVX2
|
||||
#define HAS_ARGBTORAWROW_AVX2
|
||||
#define HAS_ARGBTORGB24ROW_AVX2
|
||||
#define HAS_ARGBTOAR64ROW_AVX2
|
||||
#define HAS_ARGBTOAB64ROW_AVX2
|
||||
#define HAS_AR64TOARGBROW_AVX2
|
||||
#define HAS_AB64TOARGBROW_AVX2
|
||||
#define HAS_CONVERT16TO8ROW_AVX2
|
||||
#define HAS_CONVERT8TO16ROW_AVX2
|
||||
#define HAS_DIVIDEROW_16_AVX2
|
||||
@ -383,6 +391,10 @@ extern "C" {
|
||||
#define HAS_ARGBTORGB24ROW_NEON
|
||||
#define HAS_ARGBTORGB565DITHERROW_NEON
|
||||
#define HAS_ARGBTORGB565ROW_NEON
|
||||
#define HAS_ARGBTOAR64ROW_NEON
|
||||
#define HAS_ARGBTOAB64ROW_NEON
|
||||
#define HAS_AR64TOARGBROW_NEON
|
||||
#define HAS_AB64TOARGBROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUVJROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
@ -2563,6 +2575,71 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||
void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
|
||||
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
|
||||
|
||||
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
|
||||
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
|
||||
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
|
||||
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
|
||||
void AR64ShuffleRow_C(const uint8_t* src_ar64,
|
||||
uint8_t* dst_ar64,
|
||||
const uint8_t* shuffler,
|
||||
int width);
|
||||
void ARGBToAR64Row_SSSE3(const uint8_t* src_argb,
|
||||
uint16_t* dst_ar64,
|
||||
int width);
|
||||
void ARGBToAB64Row_SSSE3(const uint8_t* src_argb,
|
||||
uint16_t* dst_ab64,
|
||||
int width);
|
||||
void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void ARGBToAR64Row_AVX2(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
|
||||
void ARGBToAB64Row_AVX2(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
|
||||
void AR64ToARGBRow_AVX2(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
|
||||
void AB64ToARGBRow_AVX2(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
|
||||
void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
|
||||
void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
|
||||
void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
|
||||
void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
|
||||
void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
|
||||
uint16_t* dst_ptr,
|
||||
int width);
|
||||
void ARGBToAB64Row_Any_SSSE3(const uint8_t* src_ptr,
|
||||
uint16_t* dst_ptr,
|
||||
int width);
|
||||
void AR64ToARGBRow_Any_SSSE3(const uint16_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void AB64ToARGBRow_Any_SSSE3(const uint16_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void ARGBToAR64Row_Any_AVX2(const uint8_t* src_ptr,
|
||||
uint16_t* dst_ptr,
|
||||
int width);
|
||||
void ARGBToAB64Row_Any_AVX2(const uint8_t* src_ptr,
|
||||
uint16_t* dst_ptr,
|
||||
int width);
|
||||
void AR64ToARGBRow_Any_AVX2(const uint16_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void AB64ToARGBRow_Any_AVX2(const uint16_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void ARGBToAR64Row_Any_NEON(const uint8_t* src_ptr,
|
||||
uint16_t* dst_ptr,
|
||||
int width);
|
||||
void ARGBToAB64Row_Any_NEON(const uint8_t* src_ptr,
|
||||
uint16_t* dst_ptr,
|
||||
int width);
|
||||
void AR64ToARGBRow_Any_NEON(const uint16_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void AB64ToARGBRow_Any_NEON(const uint16_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
|
||||
void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||
void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1780
|
||||
#define LIBYUV_VERSION 1781
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -65,12 +65,14 @@ enum FourCC {
|
||||
// 1 Secondary YUV format: row biplanar. deprecated.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
|
||||
// 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
|
||||
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc 2 64 bpp
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
|
||||
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
|
||||
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
|
||||
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
|
||||
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
|
||||
@ -163,6 +165,8 @@ enum FourCCBpp {
|
||||
FOURCC_BPP_RGBA = 32,
|
||||
FOURCC_BPP_AR30 = 32,
|
||||
FOURCC_BPP_AB30 = 32,
|
||||
FOURCC_BPP_AR64 = 64,
|
||||
FOURCC_BPP_AB64 = 64,
|
||||
FOURCC_BPP_24BG = 24,
|
||||
FOURCC_BPP_RAW = 24,
|
||||
FOURCC_BPP_RGBP = 16,
|
||||
|
||||
@ -2791,6 +2791,10 @@ static const uvec8 kShuffleMaskABGRToARGB = {
|
||||
static const uvec8 kShuffleMaskRGBAToARGB = {
|
||||
1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u};
|
||||
|
||||
// Shuffle table for converting AR64 to AB64.
|
||||
static const uvec8 kShuffleMaskAR64ToAB64 = {
|
||||
4u, 5u, 2u, 3u, 0u, 1u, 6u, 7u, 12u, 13u, 10u, 11u, 8u, 9u, 14u, 15u};
|
||||
|
||||
// Convert BGRA to ARGB.
|
||||
LIBYUV_API
|
||||
int BGRAToARGB(const uint8_t* src_bgra,
|
||||
@ -2800,7 +2804,7 @@ int BGRAToARGB(const uint8_t* src_bgra,
|
||||
int width,
|
||||
int height) {
|
||||
return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb,
|
||||
(const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height);
|
||||
(const uint8_t*)&kShuffleMaskBGRAToARGB, width, height);
|
||||
}
|
||||
|
||||
// Convert ARGB to BGRA (same as BGRAToARGB).
|
||||
@ -2812,7 +2816,7 @@ int ARGBToBGRA(const uint8_t* src_bgra,
|
||||
int width,
|
||||
int height) {
|
||||
return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb,
|
||||
(const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height);
|
||||
(const uint8_t*)&kShuffleMaskBGRAToARGB, width, height);
|
||||
}
|
||||
|
||||
// Convert ABGR to ARGB.
|
||||
@ -2824,7 +2828,7 @@ int ABGRToARGB(const uint8_t* src_abgr,
|
||||
int width,
|
||||
int height) {
|
||||
return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb,
|
||||
(const uint8_t*)(&kShuffleMaskABGRToARGB), width, height);
|
||||
(const uint8_t*)&kShuffleMaskABGRToARGB, width, height);
|
||||
}
|
||||
|
||||
// Convert ARGB to ABGR to (same as ABGRToARGB).
|
||||
@ -2836,7 +2840,7 @@ int ARGBToABGR(const uint8_t* src_abgr,
|
||||
int width,
|
||||
int height) {
|
||||
return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb,
|
||||
(const uint8_t*)(&kShuffleMaskABGRToARGB), width, height);
|
||||
(const uint8_t*)&kShuffleMaskABGRToARGB, width, height);
|
||||
}
|
||||
|
||||
// Convert RGBA to ARGB.
|
||||
@ -2848,7 +2852,19 @@ int RGBAToARGB(const uint8_t* src_rgba,
|
||||
int width,
|
||||
int height) {
|
||||
return ARGBShuffle(src_rgba, src_stride_rgba, dst_argb, dst_stride_argb,
|
||||
(const uint8_t*)(&kShuffleMaskRGBAToARGB), width, height);
|
||||
(const uint8_t*)&kShuffleMaskRGBAToARGB, width, height);
|
||||
}
|
||||
|
||||
// Convert AR64 To AB64.
|
||||
LIBYUV_API
|
||||
int AR64ToAB64(const uint16_t* src_ar64,
|
||||
int src_stride_ar64,
|
||||
uint16_t* dst_ab64,
|
||||
int dst_stride_ab64,
|
||||
int width,
|
||||
int height) {
|
||||
return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64,
|
||||
(const uint8_t*)&kShuffleMaskAR64ToAB64, width, height);
|
||||
}
|
||||
|
||||
// Convert RGB24 to ARGB.
|
||||
@ -3357,6 +3373,124 @@ int AR30ToAB30(const uint8_t* src_ar30,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert AR64 to ARGB.
|
||||
LIBYUV_API
|
||||
int AR64ToARGB(const uint16_t* src_ar64,
|
||||
int src_stride_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*AR64ToARGBRow)(const uint16_t* src_ar64, uint8_t* dst_argb,
|
||||
int width) = AR64ToARGBRow_C;
|
||||
if (!src_ar64 || !dst_argb || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
|
||||
src_stride_ar64 = -src_stride_ar64;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_ar64 == width * 4 && dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_ar64 = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_AR64TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
AR64ToARGBRow = AR64ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
AR64ToARGBRow = AR64ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AR64TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
AR64ToARGBRow = AR64ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
AR64ToARGBRow = AR64ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AR64TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
AR64ToARGBRow = AR64ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
AR64ToARGBRow = AR64ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
AR64ToARGBRow(src_ar64, dst_argb, width);
|
||||
src_ar64 += src_stride_ar64;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert AB64 to ARGB.
|
||||
LIBYUV_API
|
||||
int AB64ToARGB(const uint16_t* src_ab64,
|
||||
int src_stride_ab64,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*AB64ToARGBRow)(const uint16_t* src_ar64, uint8_t* dst_argb,
|
||||
int width) = AB64ToARGBRow_C;
|
||||
if (!src_ab64 || !dst_argb || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_ab64 = src_ab64 + (height - 1) * src_stride_ab64;
|
||||
src_stride_ab64 = -src_stride_ab64;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_ab64 == width * 4 && dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_ab64 = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_AB64TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
AB64ToARGBRow = AB64ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
AB64ToARGBRow = AB64ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AB64TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
AB64ToARGBRow = AB64ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
AB64ToARGBRow = AB64ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_AB64TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
AB64ToARGBRow = AB64ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
AB64ToARGBRow = AB64ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
AB64ToARGBRow(src_ab64, dst_argb, width);
|
||||
src_ab64 += src_stride_ab64;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert NV12 to ARGB with matrix.
|
||||
LIBYUV_API
|
||||
int NV12ToARGBMatrix(const uint8_t* src_y,
|
||||
|
||||
@ -2009,6 +2009,124 @@ int ARGBToJ422(const uint8_t* src_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB to AR64.
|
||||
LIBYUV_API
|
||||
int ARGBToAR64(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint16_t* dst_ar64,
|
||||
int dst_stride_ar64,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
|
||||
int width) = ARGBToAR64Row_C;
|
||||
if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_ar64 = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBTOAR64ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBToAR64Row = ARGBToAR64Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOAR64ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToAR64Row = ARGBToAR64Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOAR64ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToAR64Row = ARGBToAR64Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToAR64Row(src_argb, dst_ar64, width);
|
||||
src_argb += src_stride_argb;
|
||||
dst_ar64 += dst_stride_ar64;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB to AB64.
|
||||
LIBYUV_API
|
||||
int ARGBToAB64(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint16_t* dst_ab64,
|
||||
int dst_stride_ab64,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
|
||||
int width) = ARGBToAB64Row_C;
|
||||
if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_ab64 = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBTOAB64ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBToAB64Row = ARGBToAB64Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOAB64ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToAB64Row = ARGBToAB64Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOAB64ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToAB64Row = ARGBToAB64Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToAB64Row(src_argb, dst_ab64, width);
|
||||
src_argb += src_stride_argb;
|
||||
dst_ab64 += dst_stride_ab64;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB to J400.
|
||||
LIBYUV_API
|
||||
int ARGBToJ400(const uint8_t* src_argb,
|
||||
|
||||
@ -3527,6 +3527,76 @@ int ARGBShuffle(const uint8_t* src_bgra,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Shuffle AR64 channel order. e.g. AR64 to AB64.
|
||||
LIBYUV_API
|
||||
int AR64Shuffle(const uint16_t* src_ar64,
|
||||
int src_stride_ar64,
|
||||
uint16_t* dst_ar64,
|
||||
int dst_stride_ar64,
|
||||
const uint8_t* shuffler,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*AR64ShuffleRow)(const uint8_t* src_ar64, uint8_t* dst_ar64,
|
||||
const uint8_t* shuffler, int width) = AR64ShuffleRow_C;
|
||||
if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
|
||||
src_stride_ar64 = -src_stride_ar64;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_ar64 = dst_stride_ar64 = 0;
|
||||
}
|
||||
// Assembly versions can be reused if it's implemented with shuffle.
|
||||
#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
AR64ShuffleRow = ARGBShuffleRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
AR64ShuffleRow = ARGBShuffleRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBSHUFFLEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
AR64ShuffleRow = ARGBShuffleRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
AR64ShuffleRow = ARGBShuffleRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBSHUFFLEROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
AR64ShuffleRow = ARGBShuffleRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
AR64ShuffleRow = ARGBShuffleRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBSHUFFLEROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
AR64ShuffleRow = ARGBShuffleRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 2)) {
|
||||
AR64ShuffleRow = ARGBShuffleRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
AR64ShuffleRow((uint8_t*)(src_ar64), (uint8_t*)(dst_ar64), shuffler,
|
||||
width * 2);
|
||||
src_ar64 += src_stride_ar64;
|
||||
dst_ar64 += dst_stride_ar64;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Gauss blur a float plane using Gaussian 5x5 filter with
|
||||
// coefficients of 1, 4, 6, 4, 1.
|
||||
// Each destination pixel is a blur of the 5x5
|
||||
|
||||
@ -1237,6 +1237,72 @@ ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1)
|
||||
#undef ANY11P
|
||||
#undef ANY11P
|
||||
|
||||
// Any 1 to 1 with type
|
||||
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
|
||||
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8_t temp[(MASK + 1) * SBPP]); \
|
||||
SIMD_ALIGNED(uint8_t out[(MASK + 1) * BPP]); \
|
||||
memset(temp, 0, (MASK + 1) * SBPP); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \
|
||||
ANY_SIMD((STYPE*)temp, (DTYPE*)out, MASK + 1); \
|
||||
memcpy((uint8_t*)(dst_ptr) + n * BPP, out, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOAR64ROW_SSSE3
|
||||
ANY11T(ARGBToAR64Row_Any_SSSE3, ARGBToAR64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAB64ROW_SSSE3
|
||||
ANY11T(ARGBToAB64Row_Any_SSSE3, ARGBToAB64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_AR64TOARGBROW_SSSE3
|
||||
ANY11T(AR64ToARGBRow_Any_SSSE3, AR64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAR64ROW_SSSE3
|
||||
ANY11T(AB64ToARGBRow_Any_SSSE3, AB64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAR64ROW_AVX2
|
||||
ANY11T(ARGBToAR64Row_Any_AVX2, ARGBToAR64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAB64ROW_AVX2
|
||||
ANY11T(ARGBToAB64Row_Any_AVX2, ARGBToAB64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_AR64TOARGBROW_AVX2
|
||||
ANY11T(AR64ToARGBRow_Any_AVX2, AR64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAR64ROW_AVX2
|
||||
ANY11T(AB64ToARGBRow_Any_AVX2, AB64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAR64ROW_NEON
|
||||
ANY11T(ARGBToAR64Row_Any_NEON, ARGBToAR64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAB64ROW_NEON
|
||||
ANY11T(ARGBToAB64Row_Any_NEON, ARGBToAB64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_AR64TOARGBROW_NEON
|
||||
ANY11T(AR64ToARGBRow_Any_NEON, AR64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAR64ROW_NEON
|
||||
ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
|
||||
#endif
|
||||
|
||||
#undef ANY11T
|
||||
|
||||
// Any 1 to 1 with parameter and shorts. BPP measures in shorts.
|
||||
#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
|
||||
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
|
||||
@ -1403,7 +1469,7 @@ ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
|
||||
#undef ANY11C
|
||||
|
||||
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
|
||||
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
||||
#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
||||
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \
|
||||
ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
|
||||
SIMD_ALIGNED(uint8_t temp[64 * 3]); \
|
||||
@ -1420,21 +1486,21 @@ ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
|
||||
}
|
||||
|
||||
#ifdef HAS_INTERPOLATEROW_AVX2
|
||||
ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
|
||||
ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_INTERPOLATEROW_SSSE3
|
||||
ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
|
||||
ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_INTERPOLATEROW_NEON
|
||||
ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
|
||||
ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_INTERPOLATEROW_MSA
|
||||
ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
|
||||
ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_INTERPOLATEROW_MMI
|
||||
ANY11T(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7)
|
||||
ANY11I(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7)
|
||||
#endif
|
||||
#undef ANY11T
|
||||
#undef ANY11I
|
||||
|
||||
// Any 1 to 1 mirror.
|
||||
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
|
||||
|
||||
@ -418,6 +418,82 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_ar64[0] = src_argb[0] * 0x0101;
|
||||
dst_ar64[1] = src_argb[1] * 0x0101;
|
||||
dst_ar64[2] = src_argb[2] * 0x0101;
|
||||
dst_ar64[3] = src_argb[3] * 0x0101;
|
||||
dst_ar64 += 4;
|
||||
src_argb += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_ab64[0] = src_argb[2] * 0x0101;
|
||||
dst_ab64[1] = src_argb[1] * 0x0101;
|
||||
dst_ab64[2] = src_argb[0] * 0x0101;
|
||||
dst_ab64[3] = src_argb[3] * 0x0101;
|
||||
dst_ab64 += 4;
|
||||
src_argb += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_argb[0] = src_ar64[0] >> 8;
|
||||
dst_argb[1] = src_ar64[1] >> 8;
|
||||
dst_argb[2] = src_ar64[2] >> 8;
|
||||
dst_argb[3] = src_ar64[3] >> 8;
|
||||
dst_argb += 4;
|
||||
src_ar64 += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width; ++x) {
|
||||
dst_argb[0] = src_ab64[2] >> 8;
|
||||
dst_argb[1] = src_ab64[1] >> 8;
|
||||
dst_argb[2] = src_ab64[0] >> 8;
|
||||
dst_argb[3] = src_ab64[3] >> 8;
|
||||
dst_argb += 4;
|
||||
src_ab64 += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Make shuffle compatible with SIMD versions
|
||||
void AR64ShuffleRow_C(const uint8_t* src_ar64,
|
||||
uint8_t* dst_ar64,
|
||||
const uint8_t* shuffler,
|
||||
int width) {
|
||||
const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
|
||||
uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
|
||||
int index0 = shuffler[0] / 2;
|
||||
int index1 = shuffler[2] / 2;
|
||||
int index2 = shuffler[4] / 2;
|
||||
int index3 = shuffler[6] / 2;
|
||||
// Shuffle a row of AR64.
|
||||
int x;
|
||||
for (x = 0; x < width / 2; ++x) {
|
||||
// To support in-place conversion.
|
||||
uint16_t b = src_ar64_16[index0];
|
||||
uint16_t g = src_ar64_16[index1];
|
||||
uint16_t r = src_ar64_16[index2];
|
||||
uint16_t a = src_ar64_16[index3];
|
||||
dst_ar64_16[0] = b;
|
||||
dst_ar64_16[1] = g;
|
||||
dst_ar64_16[2] = r;
|
||||
dst_ar64_16[3] = a;
|
||||
src_ar64_16 += 4;
|
||||
dst_ar64_16 += 4;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef LIBYUV_RGB7
|
||||
// Old 7 bit math for compatibility on unsupported platforms.
|
||||
static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
|
||||
|
||||
@ -1078,6 +1078,226 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
||||
}
|
||||
#endif
|
||||
|
||||
static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
|
||||
10, 9, 8, 11, 14, 13, 12, 15};
|
||||
|
||||
static const uvec8 kShuffleARGBToAB64Lo = {2, 2, 1, 1, 0, 0, 3, 3,
|
||||
6, 6, 5, 5, 4, 4, 7, 7};
|
||||
static const uvec8 kShuffleARGBToAB64Hi = {10, 10, 9, 9, 8, 8, 11, 11,
|
||||
14, 14, 13, 13, 12, 12, 15, 15};
|
||||
|
||||
void ARGBToAR64Row_SSSE3(const uint8_t* src_argb,
|
||||
uint16_t* dst_ar64,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm1 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"movdqu %%xmm1,0x10(%1) \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_ar64), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "memory", "cc", "xmm0", "xmm1");
|
||||
}
|
||||
|
||||
void ARGBToAB64Row_SSSE3(const uint8_t* src_argb,
|
||||
uint16_t* dst_ab64,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"movdqa %3,%%xmm2 \n"
|
||||
"movdqa %4,%%xmm3 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"pshufb %%xmm2,%%xmm0 \n"
|
||||
"pshufb %%xmm3,%%xmm1 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"movdqu %%xmm1,0x10(%1) \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_ab64), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleARGBToAB64Lo), // %3
|
||||
"m"(kShuffleARGBToAB64Hi) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2");
|
||||
}
|
||||
|
||||
void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"psrlw $8,%%xmm0 \n"
|
||||
"psrlw $8,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ar64), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "memory", "cc", "xmm0", "xmm1");
|
||||
}
|
||||
|
||||
void AB64ToARGBRow_SSSE3(const uint16_t* src_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"movdqa %3,%%xmm2 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"psrlw $8,%%xmm0 \n"
|
||||
"psrlw $8,%%xmm1 \n"
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"pshufb %%xmm2,%%xmm0 \n"
|
||||
"movdqu %%xmm0,(%1) \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"sub $0x4,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ar64), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleARGBToABGR) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2");
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOAR64ROW_AVX2
|
||||
void ARGBToAR64Row_AVX2(const uint8_t* src_argb,
|
||||
uint16_t* dst_ar64,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpunpckhbw %%ymm0,%%ymm0,%%ymm1 \n"
|
||||
"vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"vmovdqu %%ymm1,0x20(%1) \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"lea 0x40(%1),%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_ar64), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "memory", "cc", "xmm0", "xmm1");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARGBTOAB64ROW_AVX2
|
||||
void ARGBToAB64Row_AVX2(const uint8_t* src_argb,
|
||||
uint16_t* dst_ab64,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"vbroadcastf128 %3,%%ymm2 \n"
|
||||
"vbroadcastf128 %4,%%ymm3 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpshufb %%ymm3,%%ymm0,%%ymm1 \n"
|
||||
"vpshufb %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"vmovdqu %%ymm1,0x20(%1) \n"
|
||||
"lea 0x20(%0),%0 \n"
|
||||
"lea 0x40(%1),%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_ab64), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleARGBToAB64Lo), // %3
|
||||
"m"(kShuffleARGBToAB64Hi) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_AR64TOARGBROW_AVX2
|
||||
void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"vpsrlw $8,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $8,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ar64), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "memory", "cc", "xmm0", "xmm1");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_AB64TOARGBROW_AVX2
|
||||
void AB64ToARGBRow_AVX2(const uint16_t* src_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
|
||||
"vbroadcastf128 %3,%%ymm2 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu (%0),%%ymm0 \n"
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||
"vpsrlw $8,%%ymm0,%%ymm0 \n"
|
||||
"vpsrlw $8,%%ymm1,%%ymm1 \n"
|
||||
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||
"vpshufb %%ymm2,%%ymm0,%%ymm0 \n"
|
||||
"vmovdqu %%ymm0,(%1) \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"lea 0x20(%1),%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_ar64), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleARGBToABGR) // %3
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2");
|
||||
}
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
|
||||
// TODO(mraptis): Consider passing R, G, B multipliers as parameter.
|
||||
|
||||
@ -2119,6 +2119,105 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
|
||||
}
|
||||
|
||||
static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
|
||||
10, 9, 8, 11, 14, 13, 12, 15};
|
||||
|
||||
void ARGBToAR64Row_NEON(const uint8_t* src_argb,
|
||||
uint16_t* dst_ar64,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n"
|
||||
"vld1.8 {q2}, [%0]! \n"
|
||||
"vmov.u8 q1, q0 \n"
|
||||
"vmov.u8 q3, q2 \n"
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels
|
||||
"vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_ar64), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
void ARGBToAB64Row_NEON(const uint8_t* src_argb,
|
||||
uint16_t* dst_ab64,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"vld1.8 q4, %3 \n" // shuffler
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n"
|
||||
"vld1.8 {q2}, [%0]! \n"
|
||||
"vtbl.8 d2, {d0, d1}, d8 \n"
|
||||
"vtbl.8 d3, {d0, d1}, d9 \n"
|
||||
"vtbl.8 d6, {d4, d5}, d8 \n"
|
||||
"vtbl.8 d7, {d4, d5}, d9 \n"
|
||||
"vmov.u8 q0, q1 \n"
|
||||
"vmov.u8 q2, q3 \n"
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels
|
||||
"vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_ab64), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleARGBToABGR) // %3
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
|
||||
}
|
||||
|
||||
void AR64ToARGBRow_NEON(const uint16_t* src_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"vld1.16 {q0}, [%0]! \n"
|
||||
"vld1.16 {q1}, [%0]! \n"
|
||||
"vld1.16 {q2}, [%0]! \n"
|
||||
"vld1.16 {q3}, [%0]! \n"
|
||||
"vshrn.u16 d0, q0, #8 \n"
|
||||
"vshrn.u16 d1, q1, #8 \n"
|
||||
"vshrn.u16 d4, q2, #8 \n"
|
||||
"vshrn.u16 d5, q3, #8 \n"
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vst1.8 {q0}, [%1]! \n" // store 4 pixels
|
||||
"vst1.8 {q2}, [%1]! \n" // store 4 pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ar64), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15};
|
||||
|
||||
void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"vld1.8 d8, %3 \n" // shuffler
|
||||
"1: \n"
|
||||
"vld1.16 {q0}, [%0]! \n"
|
||||
"vld1.16 {q1}, [%0]! \n"
|
||||
"vld1.16 {q2}, [%0]! \n"
|
||||
"vld1.16 {q3}, [%0]! \n"
|
||||
"vtbl.8 d0, {d0, d1}, d8 \n"
|
||||
"vtbl.8 d1, {d2, d3}, d8 \n"
|
||||
"vtbl.8 d4, {d4, d5}, d8 \n"
|
||||
"vtbl.8 d5, {d6, d7}, d8 \n"
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||
"vst1.8 {q0}, [%1]! \n" // store 4 pixels
|
||||
"vst1.8 {q2}, [%1]! \n" // store 4 pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ab64), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleAB64ToARGB) // %3
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
|
||||
}
|
||||
|
||||
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||
asm volatile(
|
||||
"vmov.u8 d6, #25 \n" // B * 0.1016 coefficient
|
||||
|
||||
@ -1565,6 +1565,100 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
|
||||
: "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23");
|
||||
}
|
||||
|
||||
static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
|
||||
10, 9, 8, 11, 14, 13, 12, 15};
|
||||
|
||||
void ARGBToAR64Row_NEON(const uint8_t* src_argb,
|
||||
uint16_t* dst_ar64,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ldp q0, q2, [%0], #32 \n" // load 8 pixels
|
||||
"mov v1.16b, v0.16b \n"
|
||||
"mov v3.16b, v2.16b \n"
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||
"st2 {v0.16b, v1.16b}, [%1], #32 \n" // store 4 pixels
|
||||
"st2 {v2.16b, v3.16b}, [%1], #32 \n" // store 4 pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_ar64), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3");
|
||||
}
|
||||
|
||||
void ARGBToAB64Row_NEON(const uint8_t* src_argb,
|
||||
uint16_t* dst_ab64,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"ld1 {v4.16b}, %3 \n" // shuffler
|
||||
"1: \n"
|
||||
"ldp q0, q2, [%0], #32 \n" // load 8 pixels
|
||||
"tbl v0.16b, {v0.16b}, v4.16b \n"
|
||||
"tbl v2.16b, {v2.16b}, v4.16b \n"
|
||||
"mov v1.16b, v0.16b \n"
|
||||
"mov v3.16b, v2.16b \n"
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||
"st2 {v0.16b, v1.16b}, [%1], #32 \n" // store 4 pixels
|
||||
"st2 {v2.16b, v3.16b}, [%1], #32 \n" // store 4 pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_ab64), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleARGBToABGR) // %3
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
|
||||
}
|
||||
|
||||
static const uvec8 kShuffleAR64ToARGB = {1, 3, 5, 7, 9, 11, 13, 15,
|
||||
17, 19, 21, 23, 25, 27, 29, 31};
|
||||
|
||||
void AR64ToARGBRow_NEON(const uint16_t* src_ar64,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"ld1 {v4.16b}, %3 \n" // shuffler
|
||||
"1: \n"
|
||||
"ldp q0, q1, [%0], #32 \n" // load 4 pixels
|
||||
"ldp q2, q3, [%0], #32 \n" // load 4 pixels
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"tbl v0.16b, {v0.16b, v1.16b}, v4.16b \n"
|
||||
"tbl v2.16b, {v2.16b, v3.16b}, v4.16b \n"
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||
"stp q0, q2, [%1], #32 \n" // store 8 pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ar64), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleAR64ToARGB) // %3
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
|
||||
}
|
||||
|
||||
static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15,
|
||||
21, 19, 17, 23, 29, 27, 25, 31};
|
||||
|
||||
void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"ld1 {v4.16b}, %3 \n" // shuffler
|
||||
"1: \n"
|
||||
"ldp q0, q1, [%0], #32 \n" // load 4 pixels
|
||||
"ldp q2, q3, [%0], #32 \n" // load 4 pixels
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"tbl v0.16b, {v0.16b, v1.16b}, v4.16b \n"
|
||||
"tbl v2.16b, {v2.16b, v3.16b}, v4.16b \n"
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop.
|
||||
"stp q0, q2, [%1], #32 \n" // store 8 pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_ab64), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kShuffleAB64ToARGB) // %3
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
|
||||
}
|
||||
|
||||
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
asm volatile(
|
||||
"movi v4.8b, #25 \n" // B * 0.1016 coefficient
|
||||
@ -3595,8 +3689,7 @@ void MultiplyRow_16_NEON(const uint16_t* src_y,
|
||||
asm volatile(
|
||||
"dup v2.8h, %w2 \n"
|
||||
"1: \n"
|
||||
"ldp q0, q1, [%0] \n"
|
||||
"add %0, %0, #32 \n"
|
||||
"ldp q0, q1, [%0], #32 \n"
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"mul v0.8h, v0.8h, v2.8h \n"
|
||||
"mul v1.8h, v1.8h, v2.8h \n"
|
||||
@ -3619,8 +3712,7 @@ void DivideRow_16_NEON(const uint16_t* src_y,
|
||||
asm volatile(
|
||||
"dup v0.8h, %w2 \n"
|
||||
"1: \n"
|
||||
"ldp q1, q2, [%0] \n"
|
||||
"add %0, %0, #32 \n"
|
||||
"ldp q1, q2, [%0], #32 \n"
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"ushll v3.4s, v1.4h, #0 \n"
|
||||
"ushll v4.4s, v2.4h, #0 \n"
|
||||
|
||||
@ -1214,147 +1214,159 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
|
||||
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
|
||||
|
||||
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
|
||||
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
|
||||
const int kStrideA = \
|
||||
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||
const int kStrideB = \
|
||||
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||
align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \
|
||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
|
||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
|
||||
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
|
||||
src_argb[i + OFF] = (fastrand() & 0xff); \
|
||||
} \
|
||||
memset(dst_argb_c, 1, kStrideB* kHeightB); \
|
||||
memset(dst_argb_opt, 101, kStrideB* kHeightB); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
FMT_A##To##FMT_B(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, kWidth, \
|
||||
NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
FMT_A##To##FMT_B(src_argb + OFF, kStrideA, dst_argb_opt, kStrideB, \
|
||||
kWidth, NEG kHeight); \
|
||||
} \
|
||||
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
|
||||
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
|
||||
} \
|
||||
free_aligned_buffer_page_end(src_argb); \
|
||||
free_aligned_buffer_page_end(dst_argb_c); \
|
||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||
#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
|
||||
EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
|
||||
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
|
||||
const int kStrideA = \
|
||||
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||
const int kStrideB = \
|
||||
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||
align_buffer_page_end(src_argb, \
|
||||
kStrideA* kHeightA * sizeof(TYPE_A) + OFF); \
|
||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB * sizeof(TYPE_B)); \
|
||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB * sizeof(TYPE_B)); \
|
||||
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
|
||||
src_argb[i + OFF] = (fastrand() & 0xff); \
|
||||
} \
|
||||
memset(dst_argb_c, 1, kStrideB* kHeightB); \
|
||||
memset(dst_argb_opt, 101, kStrideB* kHeightB); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \
|
||||
kStrideB, kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, \
|
||||
(TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
|
||||
} \
|
||||
for (int i = 0; i < kStrideB * kHeightB * sizeof(TYPE_B); ++i) { \
|
||||
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
|
||||
} \
|
||||
free_aligned_buffer_page_end(src_argb); \
|
||||
free_aligned_buffer_page_end(dst_argb_c); \
|
||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||
}
|
||||
|
||||
#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, \
|
||||
STRIDE_B, HEIGHT_B) \
|
||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
|
||||
for (int times = 0; times < benchmark_iterations_; ++times) { \
|
||||
const int kWidth = (fastrand() & 63) + 1; \
|
||||
const int kHeight = (fastrand() & 31) + 1; \
|
||||
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
|
||||
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
|
||||
const int kStrideA = \
|
||||
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||
const int kStrideB = \
|
||||
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||
align_buffer_page_end(src_argb, kStrideA* kHeightA); \
|
||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
|
||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
|
||||
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
|
||||
src_argb[i] = (fastrand() & 0xff); \
|
||||
} \
|
||||
memset(dst_argb_c, 123, kStrideB* kHeightB); \
|
||||
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
FMT_A##To##FMT_B(src_argb, kStrideA, dst_argb_c, kStrideB, kWidth, \
|
||||
kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
FMT_A##To##FMT_B(src_argb, kStrideA, dst_argb_opt, kStrideB, kWidth, \
|
||||
kHeight); \
|
||||
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
|
||||
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
|
||||
} \
|
||||
free_aligned_buffer_page_end(src_argb); \
|
||||
free_aligned_buffer_page_end(dst_argb_c); \
|
||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||
} \
|
||||
#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
|
||||
TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
|
||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
|
||||
for (int times = 0; times < benchmark_iterations_; ++times) { \
|
||||
const int kWidth = (fastrand() & 63) + 1; \
|
||||
const int kHeight = (fastrand() & 31) + 1; \
|
||||
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
|
||||
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
|
||||
const int kStrideA = \
|
||||
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||
const int kStrideB = \
|
||||
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||
align_buffer_page_end(src_argb, kStrideA* kHeightA * sizeof(TYPE_A)); \
|
||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB * sizeof(TYPE_B)); \
|
||||
align_buffer_page_end(dst_argb_opt, \
|
||||
kStrideB* kHeightB * sizeof(TYPE_B)); \
|
||||
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
|
||||
src_argb[i] = 0xfe; \
|
||||
} \
|
||||
memset(dst_argb_c, 123, kStrideB* kHeightB); \
|
||||
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
|
||||
kStrideB, kWidth, kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
|
||||
kStrideB, kWidth, kHeight); \
|
||||
for (int i = 0; i < kStrideB * kHeightB * sizeof(TYPE_B); ++i) { \
|
||||
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
|
||||
} \
|
||||
free_aligned_buffer_page_end(src_argb); \
|
||||
free_aligned_buffer_page_end(dst_argb_c); \
|
||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B) \
|
||||
TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, benchmark_width_ - 4, _Any, +, 0) \
|
||||
TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, benchmark_width_, _Unaligned, +, 1) \
|
||||
TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, benchmark_width_, _Invert, -, 0) \
|
||||
TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, benchmark_width_, _Opt, +, 0) \
|
||||
TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B)
|
||||
#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
|
||||
EPP_B, STRIDE_B, HEIGHT_B) \
|
||||
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
|
||||
STRIDE_B, HEIGHT_B, benchmark_width_ - 4, _Any, +, 0) \
|
||||
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
|
||||
STRIDE_B, HEIGHT_B, benchmark_width_, _Unaligned, +, 1) \
|
||||
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
|
||||
STRIDE_B, HEIGHT_B, benchmark_width_, _Invert, -, 0) \
|
||||
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
|
||||
STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) \
|
||||
TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
|
||||
EPP_B, STRIDE_B, HEIGHT_B)
|
||||
|
||||
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1)
|
||||
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
|
||||
TESTATOB(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1)
|
||||
TESTATOB(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
|
||||
#endif
|
||||
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1)
|
||||
TESTATOB(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
|
||||
#endif
|
||||
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1)
|
||||
TESTATOB(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1)
|
||||
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
|
||||
TESTATOB(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1)
|
||||
TESTATOB(RGBA, 4, 4, 1, J400, 1, 1, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1)
|
||||
TESTATOB(ABGR, 4, 4, 1, RAW, 3, 3, 1)
|
||||
TESTATOB(ABGR, 4, 4, 1, RGB24, 3, 3, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
|
||||
TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
|
||||
TESTATOB(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
|
||||
TESTATOB(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1)
|
||||
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1) // 4
|
||||
TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1)
|
||||
TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1)
|
||||
TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1)
|
||||
TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1)
|
||||
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1)
|
||||
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1)
|
||||
TESTATOB(RGB24, 3, 3, 1, RGB24Mirror, 3, 3, 1)
|
||||
TESTATOB(RAW, 3, 3, 1, J400, 1, 1, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) // 4
|
||||
TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
|
||||
TESTATOB(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
|
||||
TESTATOB(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
|
||||
TESTATOB(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
|
||||
TESTATOB(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
|
||||
TESTATOB(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
|
||||
TESTATOB(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
|
||||
TESTATOB(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
|
||||
#ifdef LITTLE_ENDIAN_ONLY_TEST
|
||||
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
#endif
|
||||
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1)
|
||||
TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1)
|
||||
TESTATOB(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
|
||||
TESTATOB(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
|
||||
TESTATOB(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
|
||||
TESTATOB(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
|
||||
TESTATOB(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
|
||||
TESTATOB(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
|
||||
TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
|
||||
TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
|
||||
TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
|
||||
|
||||
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, W1280, N, NEG, OFF) \
|
||||
@ -1443,35 +1455,38 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1)
|
||||
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
|
||||
#endif
|
||||
|
||||
#define TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, OFF) \
|
||||
#define TESTSYMI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, \
|
||||
OFF) \
|
||||
TEST_F(LibYUVConvertTest, FMT_ATOB##_Symetric##N) { \
|
||||
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
|
||||
const int kHeight = benchmark_height_; \
|
||||
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
|
||||
const int kStrideA = \
|
||||
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||
align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \
|
||||
align_buffer_page_end(dst_argb_c, kStrideA* kHeightA); \
|
||||
align_buffer_page_end(dst_argb_opt, kStrideA* kHeightA); \
|
||||
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
|
||||
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||
align_buffer_page_end(src_argb, \
|
||||
kStrideA* kHeightA * sizeof(TYPE_A) + OFF); \
|
||||
align_buffer_page_end(dst_argb_c, kStrideA* kHeightA * sizeof(TYPE_A)); \
|
||||
align_buffer_page_end(dst_argb_opt, kStrideA* kHeightA * sizeof(TYPE_A)); \
|
||||
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
|
||||
src_argb[i + OFF] = (fastrand() & 0xff); \
|
||||
} \
|
||||
memset(dst_argb_c, 1, kStrideA* kHeightA); \
|
||||
memset(dst_argb_opt, 101, kStrideA* kHeightA); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
FMT_ATOB(src_argb + OFF, kStrideA, dst_argb_c, kStrideA, kWidth, \
|
||||
NEG kHeight); \
|
||||
FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \
|
||||
kStrideA, kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
FMT_ATOB(src_argb + OFF, kStrideA, dst_argb_opt, kStrideA, kWidth, \
|
||||
NEG kHeight); \
|
||||
FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_opt, \
|
||||
kStrideA, kWidth, NEG kHeight); \
|
||||
} \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
FMT_ATOB(dst_argb_c, kStrideA, dst_argb_c, kStrideA, kWidth, NEG kHeight); \
|
||||
FMT_ATOB((TYPE_A*)dst_argb_c, kStrideA, (TYPE_A*)dst_argb_c, kStrideA, \
|
||||
kWidth, NEG kHeight); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
FMT_ATOB(dst_argb_opt, kStrideA, dst_argb_opt, kStrideA, kWidth, \
|
||||
NEG kHeight); \
|
||||
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
|
||||
FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \
|
||||
kWidth, NEG kHeight); \
|
||||
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
|
||||
EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \
|
||||
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
|
||||
} \
|
||||
@ -1480,18 +1495,20 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
|
||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||
}
|
||||
|
||||
#define TESTSYM(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A) \
|
||||
TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ - 4, _Any, +, \
|
||||
0) \
|
||||
TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, _Unaligned, \
|
||||
+, 1) \
|
||||
TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, _Opt, +, 0)
|
||||
#define TESTSYM(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \
|
||||
TESTSYMI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ - 4, \
|
||||
_Any, +, 0) \
|
||||
TESTSYMI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
|
||||
_Unaligned, +, 1) \
|
||||
TESTSYMI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
|
||||
_Opt, +, 0)
|
||||
|
||||
TESTSYM(ARGBToARGB, 4, 4, 1)
|
||||
TESTSYM(ARGBToBGRA, 4, 4, 1)
|
||||
TESTSYM(ARGBToABGR, 4, 4, 1)
|
||||
TESTSYM(BGRAToARGB, 4, 4, 1)
|
||||
TESTSYM(ABGRToARGB, 4, 4, 1)
|
||||
TESTSYM(ARGBToARGB, uint8_t, 4, 4, 1)
|
||||
TESTSYM(ARGBToBGRA, uint8_t, 4, 4, 1)
|
||||
TESTSYM(ARGBToABGR, uint8_t, 4, 4, 1)
|
||||
TESTSYM(BGRAToARGB, uint8_t, 4, 4, 1)
|
||||
TESTSYM(ABGRToARGB, uint8_t, 4, 4, 1)
|
||||
TESTSYM(AB64ToAR64, uint16_t, 4, 4, 1)
|
||||
|
||||
TEST_F(LibYUVConvertTest, Test565) {
|
||||
SIMD_ALIGNED(uint8_t orig_pixels[256][4]);
|
||||
|
||||
@ -29,7 +29,7 @@ static bool TestValidFourCC(uint32_t fourcc, int bpp) {
|
||||
!TestValidChar((fourcc >> 24) & 0xff)) {
|
||||
return false;
|
||||
}
|
||||
if (bpp < 0 || bpp > 32) {
|
||||
if (bpp < 0 || bpp > 64) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -72,6 +72,8 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_AR64, FOURCC_BPP_AR64));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_AB64, FOURCC_BPP_AB64));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user