Add support for AR64 format

Add following conversions:
ARGB,ABGR <-> AR64,AB64
AR64 <-> AB64

R=fbarchard@chromium.org

Change-Id: I5ca5b40a98bffea11981e136afae4a511ba6c564
Bug: libyuv:886
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2746780
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Yuan Tong 2021-03-13 17:17:02 +08:00 committed by Frank Barchard
parent 19bbedfd3e
commit f37014fcff
18 changed files with 1227 additions and 171 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1780
Version: 1781
License: BSD
License File: LICENSE

View File

@ -54,12 +54,14 @@ The following is extracted from video_common.h as a complete list of formats sup
// 1 Secondary YUV format: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'), // deprecated.
// 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc, 2 64 bpp
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
@ -180,6 +182,14 @@ The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values.
The 10 bit RGB values range from 0 to 1023.
XR30 is the same as AR30 but with no alpha channel.
# AB64 and AR64
AB64 is similar to ABGR, with 16 bit (2 bytes) per channel. Each channel stores an unsigned short.
In memory R is the lowest and A is the highest.
Each channel has value ranges from 0 to 65535.
AR64 is similar to ARGB.
# NV12 and NV21
NV12 is a biplanar format with a full sized Y plane followed by a single

View File

@ -1065,6 +1065,42 @@ int AR30ToAB30(const uint8_t* src_ar30,
int width,
int height);
// Convert AR64 to ARGB.
LIBYUV_API
int AR64ToARGB(const uint16_t* src_ar64,
int src_stride_ar64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert AB64 to ABGR.
#define AB64ToABGR AR64ToARGB
// Convert AB64 to ARGB.
LIBYUV_API
int AB64ToARGB(const uint16_t* src_ab64,
int src_stride_ab64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert AR64 to ABGR.
#define AR64ToABGR AB64ToARGB
// Convert AR64 To AB64.
LIBYUV_API
int AR64ToAB64(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height);
// Convert AB64 To AR64.
#define AB64ToAR64 AR64ToAB64
// src_width/height provided by capture
// dst_width/height for clipping determine final size.
LIBYUV_API

View File

@ -153,6 +153,30 @@ int ARGBToI444(const uint8_t* src_argb,
int width,
int height);
// Convert ARGB to AR64.
LIBYUV_API
int ARGBToAR64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height);
// Convert ABGR to AB64.
#define ABGRToAB64 ARGBToAR64
// Convert ARGB to AB64.
LIBYUV_API
int ARGBToAB64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height);
// Convert ABGR to AR64.
#define ABGRToAR64 ARGBToAB64
// Convert ARGB To I422.
LIBYUV_API
int ARGBToI422(const uint8_t* src_argb,

View File

@ -945,7 +945,7 @@ void ARGBAffineRow_SSE2(const uint8_t* src_argb,
int width);
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
// shuffler is 16 bytes and must be aligned.
// shuffler is 16 bytes.
LIBYUV_API
int ARGBShuffle(const uint8_t* src_bgra,
int src_stride_bgra,
@ -955,6 +955,17 @@ int ARGBShuffle(const uint8_t* src_bgra,
int width,
int height);
// Shuffle AR64 channel order. e.g. AR64 to AB64.
// shuffler is 16 bytes.
LIBYUV_API
int AR64Shuffle(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ar64,
int dst_stride_ar64,
const uint8_t* shuffler,
int width,
int height);
// Sobel ARGB effect with planar output.
LIBYUV_API
int ARGBSobelToPlane(const uint8_t* src_argb,

View File

@ -273,6 +273,10 @@ extern "C" {
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#define HAS_ABGRTOAR30ROW_SSSE3
#define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_ARGBTOAR64ROW_SSSE3
#define HAS_ARGBTOAB64ROW_SSSE3
#define HAS_AR64TOARGBROW_SSSE3
#define HAS_AB64TOARGBROW_SSSE3
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
#define HAS_HALFMERGEUVROW_SSSE3
@ -318,6 +322,10 @@ extern "C" {
#define HAS_ARGBTOAR30ROW_AVX2
#define HAS_ARGBTORAWROW_AVX2
#define HAS_ARGBTORGB24ROW_AVX2
#define HAS_ARGBTOAR64ROW_AVX2
#define HAS_ARGBTOAB64ROW_AVX2
#define HAS_AR64TOARGBROW_AVX2
#define HAS_AB64TOARGBROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2
#define HAS_CONVERT8TO16ROW_AVX2
#define HAS_DIVIDEROW_16_AVX2
@ -383,6 +391,10 @@ extern "C" {
#define HAS_ARGBTORGB24ROW_NEON
#define HAS_ARGBTORGB565DITHERROW_NEON
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTOAR64ROW_NEON
#define HAS_ARGBTOAB64ROW_NEON
#define HAS_AR64TOARGBROW_NEON
#define HAS_AB64TOARGBROW_NEON
#define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUVJROW_NEON
#define HAS_ARGBTOUVROW_NEON
@ -2563,6 +2575,71 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,
const uint8_t* shuffler,
int width);
void ARGBToAR64Row_SSSE3(const uint8_t* src_argb,
uint16_t* dst_ar64,
int width);
void ARGBToAB64Row_SSSE3(const uint8_t* src_argb,
uint16_t* dst_ab64,
int width);
void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64,
uint8_t* dst_argb,
int width);
void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64,
uint8_t* dst_argb,
int width);
void ARGBToAR64Row_AVX2(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_AVX2(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_AVX2(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_AVX2(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
void ARGBToAB64Row_Any_SSSE3(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
void AR64ToARGBRow_Any_SSSE3(const uint16_t* src_ptr,
uint8_t* dst_ptr,
int width);
void AB64ToARGBRow_Any_SSSE3(const uint16_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToAR64Row_Any_AVX2(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
void ARGBToAB64Row_Any_AVX2(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
void AR64ToARGBRow_Any_AVX2(const uint16_t* src_ptr,
uint8_t* dst_ptr,
int width);
void AB64ToARGBRow_Any_AVX2(const uint16_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBToAR64Row_Any_NEON(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
void ARGBToAB64Row_Any_NEON(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
void AR64ToARGBRow_Any_NEON(const uint16_t* src_ptr,
uint8_t* dst_ptr,
int width);
void AB64ToARGBRow_Any_NEON(const uint16_t* src_ptr,
uint8_t* dst_ptr,
int width);
void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1780
#define LIBYUV_VERSION 1781
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -65,12 +65,14 @@ enum FourCC {
// 1 Secondary YUV format: row biplanar. deprecated.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
// 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
// 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc 2 64 bpp
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel.
FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
@ -163,6 +165,8 @@ enum FourCCBpp {
FOURCC_BPP_RGBA = 32,
FOURCC_BPP_AR30 = 32,
FOURCC_BPP_AB30 = 32,
FOURCC_BPP_AR64 = 64,
FOURCC_BPP_AB64 = 64,
FOURCC_BPP_24BG = 24,
FOURCC_BPP_RAW = 24,
FOURCC_BPP_RGBP = 16,

View File

@ -2791,6 +2791,10 @@ static const uvec8 kShuffleMaskABGRToARGB = {
static const uvec8 kShuffleMaskRGBAToARGB = {
1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u};
// Shuffle table for converting AR64 to AB64.
static const uvec8 kShuffleMaskAR64ToAB64 = {
4u, 5u, 2u, 3u, 0u, 1u, 6u, 7u, 12u, 13u, 10u, 11u, 8u, 9u, 14u, 15u};
// Convert BGRA to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8_t* src_bgra,
@ -2800,7 +2804,7 @@ int BGRAToARGB(const uint8_t* src_bgra,
int width,
int height) {
return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb,
(const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height);
(const uint8_t*)&kShuffleMaskBGRAToARGB, width, height);
}
// Convert ARGB to BGRA (same as BGRAToARGB).
@ -2812,7 +2816,7 @@ int ARGBToBGRA(const uint8_t* src_bgra,
int width,
int height) {
return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb,
(const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height);
(const uint8_t*)&kShuffleMaskBGRAToARGB, width, height);
}
// Convert ABGR to ARGB.
@ -2824,7 +2828,7 @@ int ABGRToARGB(const uint8_t* src_abgr,
int width,
int height) {
return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb,
(const uint8_t*)(&kShuffleMaskABGRToARGB), width, height);
(const uint8_t*)&kShuffleMaskABGRToARGB, width, height);
}
// Convert ARGB to ABGR to (same as ABGRToARGB).
@ -2836,7 +2840,7 @@ int ARGBToABGR(const uint8_t* src_abgr,
int width,
int height) {
return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb,
(const uint8_t*)(&kShuffleMaskABGRToARGB), width, height);
(const uint8_t*)&kShuffleMaskABGRToARGB, width, height);
}
// Convert RGBA to ARGB.
@ -2848,7 +2852,19 @@ int RGBAToARGB(const uint8_t* src_rgba,
int width,
int height) {
return ARGBShuffle(src_rgba, src_stride_rgba, dst_argb, dst_stride_argb,
(const uint8_t*)(&kShuffleMaskRGBAToARGB), width, height);
(const uint8_t*)&kShuffleMaskRGBAToARGB, width, height);
}
// Convert AR64 To AB64.
LIBYUV_API
int AR64ToAB64(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height) {
return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64,
(const uint8_t*)&kShuffleMaskAR64ToAB64, width, height);
}
// Convert RGB24 to ARGB.
@ -3357,6 +3373,124 @@ int AR30ToAB30(const uint8_t* src_ar30,
return 0;
}
// Convert AR64 to ARGB.
LIBYUV_API
int AR64ToARGB(const uint16_t* src_ar64,
int src_stride_ar64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
void (*AR64ToARGBRow)(const uint16_t* src_ar64, uint8_t* dst_argb,
int width) = AR64ToARGBRow_C;
if (!src_ar64 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
src_stride_ar64 = -src_stride_ar64;
}
// Coalesce rows.
if (src_stride_ar64 == width * 4 && dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_ar64 = dst_stride_argb = 0;
}
#if defined(HAS_AR64TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
AR64ToARGBRow = AR64ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
AR64ToARGBRow = AR64ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_AR64TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
AR64ToARGBRow = AR64ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
AR64ToARGBRow = AR64ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_AR64TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
AR64ToARGBRow = AR64ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
AR64ToARGBRow = AR64ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
AR64ToARGBRow(src_ar64, dst_argb, width);
src_ar64 += src_stride_ar64;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert AB64 to ARGB.
LIBYUV_API
int AB64ToARGB(const uint16_t* src_ab64,
int src_stride_ab64,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
void (*AB64ToARGBRow)(const uint16_t* src_ar64, uint8_t* dst_argb,
int width) = AB64ToARGBRow_C;
if (!src_ab64 || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_ab64 = src_ab64 + (height - 1) * src_stride_ab64;
src_stride_ab64 = -src_stride_ab64;
}
// Coalesce rows.
if (src_stride_ab64 == width * 4 && dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_ab64 = dst_stride_argb = 0;
}
#if defined(HAS_AB64TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
AB64ToARGBRow = AB64ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
AB64ToARGBRow = AB64ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_AB64TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
AB64ToARGBRow = AB64ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
AB64ToARGBRow = AB64ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_AB64TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
AB64ToARGBRow = AB64ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
AB64ToARGBRow = AB64ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
AB64ToARGBRow(src_ab64, dst_argb, width);
src_ab64 += src_stride_ab64;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert NV12 to ARGB with matrix.
LIBYUV_API
int NV12ToARGBMatrix(const uint8_t* src_y,

View File

@ -2009,6 +2009,124 @@ int ARGBToJ422(const uint8_t* src_argb,
return 0;
}
// Convert ARGB to AR64.
LIBYUV_API
int ARGBToAR64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height) {
int y;
void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
int width) = ARGBToAR64Row_C;
if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ar64 = 0;
}
#if defined(HAS_ARGBTOAR64ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToAR64Row = ARGBToAR64Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOAR64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAR64Row = ARGBToAR64Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOAR64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToAR64Row = ARGBToAR64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAR64Row(src_argb, dst_ar64, width);
src_argb += src_stride_argb;
dst_ar64 += dst_stride_ar64;
}
return 0;
}
// Convert ARGB to AB64.
LIBYUV_API
int ARGBToAB64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height) {
int y;
void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
int width) = ARGBToAB64Row_C;
if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ab64 = 0;
}
#if defined(HAS_ARGBTOAB64ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToAB64Row = ARGBToAB64Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOAB64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAB64Row = ARGBToAB64Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOAB64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToAB64Row = ARGBToAB64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAB64Row(src_argb, dst_ab64, width);
src_argb += src_stride_argb;
dst_ab64 += dst_stride_ab64;
}
return 0;
}
// Convert ARGB to J400.
LIBYUV_API
int ARGBToJ400(const uint8_t* src_argb,

View File

@ -3527,6 +3527,76 @@ int ARGBShuffle(const uint8_t* src_bgra,
return 0;
}
// Shuffle AR64 channel order. e.g. AR64 to AB64.
LIBYUV_API
int AR64Shuffle(const uint16_t* src_ar64,
int src_stride_ar64,
uint16_t* dst_ar64,
int dst_stride_ar64,
const uint8_t* shuffler,
int width,
int height) {
int y;
void (*AR64ShuffleRow)(const uint8_t* src_ar64, uint8_t* dst_ar64,
const uint8_t* shuffler, int width) = AR64ShuffleRow_C;
if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
src_stride_ar64 = -src_stride_ar64;
}
// Coalesce rows.
if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_ar64 = dst_stride_ar64 = 0;
}
// Assembly versions can be reused if it's implemented with shuffle.
#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
AR64ShuffleRow = ARGBShuffleRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
AR64ShuffleRow = ARGBShuffleRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBSHUFFLEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
AR64ShuffleRow = ARGBShuffleRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
AR64ShuffleRow = ARGBShuffleRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBSHUFFLEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
AR64ShuffleRow = ARGBShuffleRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
AR64ShuffleRow = ARGBShuffleRow_NEON;
}
}
#endif
#if defined(HAS_ARGBSHUFFLEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
AR64ShuffleRow = ARGBShuffleRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
AR64ShuffleRow = ARGBShuffleRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
AR64ShuffleRow((uint8_t*)(src_ar64), (uint8_t*)(dst_ar64), shuffler,
width * 2);
src_ar64 += src_stride_ar64;
dst_ar64 += dst_stride_ar64;
}
return 0;
}
// Gauss blur a float plane using Gaussian 5x5 filter with
// coefficients of 1, 4, 6, 4, 1.
// Each destination pixel is a blur of the 5x5

View File

@ -1237,6 +1237,72 @@ ANY11P(ARGBShuffleRow_Any_MMI, ARGBShuffleRow_MMI, const uint8_t*, 4, 4, 1)
#undef ANY11P
#undef ANY11P
// Any 1 to 1 with type
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t temp[(MASK + 1) * SBPP]); \
SIMD_ALIGNED(uint8_t out[(MASK + 1) * BPP]); \
memset(temp, 0, (MASK + 1) * SBPP); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
memcpy(temp, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \
ANY_SIMD((STYPE*)temp, (DTYPE*)out, MASK + 1); \
memcpy((uint8_t*)(dst_ptr) + n * BPP, out, r * BPP); \
}
#ifdef HAS_ARGBTOAR64ROW_SSSE3
ANY11T(ARGBToAR64Row_Any_SSSE3, ARGBToAR64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
#endif
#ifdef HAS_ARGBTOAB64ROW_SSSE3
ANY11T(ARGBToAB64Row_Any_SSSE3, ARGBToAB64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3)
#endif
#ifdef HAS_AR64TOARGBROW_SSSE3
ANY11T(AR64ToARGBRow_Any_SSSE3, AR64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
#endif
#ifdef HAS_ARGBTOAR64ROW_SSSE3
ANY11T(AB64ToARGBRow_Any_SSSE3, AB64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3)
#endif
#ifdef HAS_ARGBTOAR64ROW_AVX2
ANY11T(ARGBToAR64Row_Any_AVX2, ARGBToAR64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
#endif
#ifdef HAS_ARGBTOAB64ROW_AVX2
ANY11T(ARGBToAB64Row_Any_AVX2, ARGBToAB64Row_AVX2, 4, 8, uint8_t, uint16_t, 7)
#endif
#ifdef HAS_AR64TOARGBROW_AVX2
ANY11T(AR64ToARGBRow_Any_AVX2, AR64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
#endif
#ifdef HAS_ARGBTOAR64ROW_AVX2
ANY11T(AB64ToARGBRow_Any_AVX2, AB64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7)
#endif
#ifdef HAS_ARGBTOAR64ROW_NEON
ANY11T(ARGBToAR64Row_Any_NEON, ARGBToAR64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
#endif
#ifdef HAS_ARGBTOAB64ROW_NEON
ANY11T(ARGBToAB64Row_Any_NEON, ARGBToAB64Row_NEON, 4, 8, uint8_t, uint16_t, 7)
#endif
#ifdef HAS_AR64TOARGBROW_NEON
ANY11T(AR64ToARGBRow_Any_NEON, AR64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
#endif
#ifdef HAS_ARGBTOAR64ROW_NEON
ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7)
#endif
#undef ANY11T
// Any 1 to 1 with parameter and shorts. BPP measures in shorts.
#define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \
void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \
@ -1403,7 +1469,7 @@ ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
#undef ANY11C
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
#define ANY11I(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \
ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
SIMD_ALIGNED(uint8_t temp[64 * 3]); \
@ -1420,21 +1486,21 @@ ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
}
#ifdef HAS_INTERPOLATEROW_AVX2
ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_NEON
ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_MSA
ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_MMI
ANY11T(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7)
ANY11I(InterpolateRow_Any_MMI, InterpolateRow_MMI, 1, 1, 7)
#endif
#undef ANY11T
#undef ANY11I
// Any 1 to 1 mirror.
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \

View File

@ -418,6 +418,82 @@ void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
}
}
void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
int x;
for (x = 0; x < width; ++x) {
dst_ar64[0] = src_argb[0] * 0x0101;
dst_ar64[1] = src_argb[1] * 0x0101;
dst_ar64[2] = src_argb[2] * 0x0101;
dst_ar64[3] = src_argb[3] * 0x0101;
dst_ar64 += 4;
src_argb += 4;
}
}
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
int x;
for (x = 0; x < width; ++x) {
dst_ab64[0] = src_argb[2] * 0x0101;
dst_ab64[1] = src_argb[1] * 0x0101;
dst_ab64[2] = src_argb[0] * 0x0101;
dst_ab64[3] = src_argb[3] * 0x0101;
dst_ab64 += 4;
src_argb += 4;
}
}
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
dst_argb[0] = src_ar64[0] >> 8;
dst_argb[1] = src_ar64[1] >> 8;
dst_argb[2] = src_ar64[2] >> 8;
dst_argb[3] = src_ar64[3] >> 8;
dst_argb += 4;
src_ar64 += 4;
}
}
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
int x;
for (x = 0; x < width; ++x) {
dst_argb[0] = src_ab64[2] >> 8;
dst_argb[1] = src_ab64[1] >> 8;
dst_argb[2] = src_ab64[0] >> 8;
dst_argb[3] = src_ab64[3] >> 8;
dst_argb += 4;
src_ab64 += 4;
}
}
// TODO(fbarchard): Make shuffle compatible with SIMD versions
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,
const uint8_t* shuffler,
int width) {
const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
int index0 = shuffler[0] / 2;
int index1 = shuffler[2] / 2;
int index2 = shuffler[4] / 2;
int index3 = shuffler[6] / 2;
// Shuffle a row of AR64.
int x;
for (x = 0; x < width / 2; ++x) {
// To support in-place conversion.
uint16_t b = src_ar64_16[index0];
uint16_t g = src_ar64_16[index1];
uint16_t r = src_ar64_16[index2];
uint16_t a = src_ar64_16[index3];
dst_ar64_16[0] = b;
dst_ar64_16[1] = g;
dst_ar64_16[2] = r;
dst_ar64_16[3] = a;
src_ar64_16 += 4;
dst_ar64_16 += 4;
}
}
#ifdef LIBYUV_RGB7
// Old 7 bit math for compatibility on unsupported platforms.
static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {

View File

@ -1078,6 +1078,226 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
}
#endif
static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
10, 9, 8, 11, 14, 13, 12, 15};
static const uvec8 kShuffleARGBToAB64Lo = {2, 2, 1, 1, 0, 0, 3, 3,
6, 6, 5, 5, 4, 4, 7, 7};
static const uvec8 kShuffleARGBToAB64Hi = {10, 10, 9, 9, 8, 8, 11, 11,
14, 14, 13, 13, 12, 12, 15, 15};
void ARGBToAR64Row_SSSE3(const uint8_t* src_argb,
uint16_t* dst_ar64,
int width) {
asm volatile(
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm0,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm1 \n"
"movdqu %%xmm0,(%1) \n"
"movdqu %%xmm1,0x10(%1) \n"
"lea 0x10(%0),%0 \n"
"lea 0x20(%1),%1 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ar64), // %1
"+r"(width) // %2
:
: "memory", "cc", "xmm0", "xmm1");
}
void ARGBToAB64Row_SSSE3(const uint8_t* src_argb,
uint16_t* dst_ab64,
int width) {
asm volatile(
"movdqa %3,%%xmm2 \n"
"movdqa %4,%%xmm3 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pshufb %%xmm2,%%xmm0 \n"
"pshufb %%xmm3,%%xmm1 \n"
"movdqu %%xmm0,(%1) \n"
"movdqu %%xmm1,0x10(%1) \n"
"lea 0x10(%0),%0 \n"
"lea 0x20(%1),%1 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToAB64Lo), // %3
"m"(kShuffleARGBToAB64Hi) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2");
}
void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64,
uint8_t* dst_argb,
int width) {
asm volatile(
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n"
"psrlw $8,%%xmm0 \n"
"psrlw $8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"movdqu %%xmm0,(%1) \n"
"lea 0x20(%0),%0 \n"
"lea 0x10(%1),%1 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
: "memory", "cc", "xmm0", "xmm1");
}
void AB64ToARGBRow_SSSE3(const uint16_t* src_ar64,
uint8_t* dst_argb,
int width) {
asm volatile(
"movdqa %3,%%xmm2 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n"
"psrlw $8,%%xmm0 \n"
"psrlw $8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"pshufb %%xmm2,%%xmm0 \n"
"movdqu %%xmm0,(%1) \n"
"lea 0x20(%0),%0 \n"
"lea 0x10(%1),%1 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToABGR) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#ifdef HAS_ARGBTOAR64ROW_AVX2
void ARGBToAR64Row_AVX2(const uint8_t* src_argb,
uint16_t* dst_ar64,
int width) {
asm volatile(
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpunpckhbw %%ymm0,%%ymm0,%%ymm1 \n"
"vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
"vmovdqu %%ymm0,(%1) \n"
"vmovdqu %%ymm1,0x20(%1) \n"
"lea 0x20(%0),%0 \n"
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ar64), // %1
"+r"(width) // %2
:
: "memory", "cc", "xmm0", "xmm1");
}
#endif
#ifdef HAS_ARGBTOAB64ROW_AVX2
void ARGBToAB64Row_AVX2(const uint8_t* src_argb,
uint16_t* dst_ab64,
int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm2 \n"
"vbroadcastf128 %4,%%ymm3 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpshufb %%ymm3,%%ymm0,%%ymm1 \n"
"vpshufb %%ymm2,%%ymm0,%%ymm0 \n"
"vmovdqu %%ymm0,(%1) \n"
"vmovdqu %%ymm1,0x20(%1) \n"
"lea 0x20(%0),%0 \n"
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToAB64Lo), // %3
"m"(kShuffleARGBToAB64Hi) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif
#ifdef HAS_AR64TOARGBROW_AVX2
void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
uint8_t* dst_argb,
int width) {
asm volatile(
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n"
"vpsrlw $8,%%ymm0,%%ymm0 \n"
"vpsrlw $8,%%ymm1,%%ymm1 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vmovdqu %%ymm0,(%1) \n"
"lea 0x40(%0),%0 \n"
"lea 0x20(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
: "memory", "cc", "xmm0", "xmm1");
}
#endif
#ifdef HAS_AB64TOARGBROW_AVX2
void AB64ToARGBRow_AVX2(const uint16_t* src_ar64,
uint8_t* dst_argb,
int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm2 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n"
"vpsrlw $8,%%ymm0,%%ymm0 \n"
"vpsrlw $8,%%ymm1,%%ymm1 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpshufb %%ymm2,%%ymm0,%%ymm0 \n"
"vmovdqu %%ymm0,(%1) \n"
"lea 0x40(%0),%0 \n"
"lea 0x20(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToABGR) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2");
}
#endif
// clang-format off
// TODO(mraptis): Consider passing R, G, B multipliers as parameter.

View File

@ -2119,6 +2119,105 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
}
static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
10, 9, 8, 11, 14, 13, 12, 15};
void ARGBToAR64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ar64,
int width) {
asm volatile(
"1: \n"
"vld1.8 {q0}, [%0]! \n"
"vld1.8 {q2}, [%0]! \n"
"vmov.u8 q1, q0 \n"
"vmov.u8 q3, q2 \n"
"subs %2, %2, #8 \n" // 8 processed per loop.
"vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels
"vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ar64), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3");
}
void ARGBToAB64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ab64,
int width) {
asm volatile(
"vld1.8 q4, %3 \n" // shuffler
"1: \n"
"vld1.8 {q0}, [%0]! \n"
"vld1.8 {q2}, [%0]! \n"
"vtbl.8 d2, {d0, d1}, d8 \n"
"vtbl.8 d3, {d0, d1}, d9 \n"
"vtbl.8 d6, {d4, d5}, d8 \n"
"vtbl.8 d7, {d4, d5}, d9 \n"
"vmov.u8 q0, q1 \n"
"vmov.u8 q2, q3 \n"
"subs %2, %2, #8 \n" // 8 processed per loop.
"vst2.8 {q0, q1}, [%1]! \n" // store 4 pixels
"vst2.8 {q2, q3}, [%1]! \n" // store 4 pixels
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToABGR) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
}
void AR64ToARGBRow_NEON(const uint16_t* src_ar64,
uint8_t* dst_argb,
int width) {
asm volatile(
"1: \n"
"vld1.16 {q0}, [%0]! \n"
"vld1.16 {q1}, [%0]! \n"
"vld1.16 {q2}, [%0]! \n"
"vld1.16 {q3}, [%0]! \n"
"vshrn.u16 d0, q0, #8 \n"
"vshrn.u16 d1, q1, #8 \n"
"vshrn.u16 d4, q2, #8 \n"
"vshrn.u16 d5, q3, #8 \n"
"subs %2, %2, #8 \n" // 8 processed per loop.
"vst1.8 {q0}, [%1]! \n" // store 4 pixels
"vst1.8 {q2}, [%1]! \n" // store 4 pixels
"bgt 1b \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3");
}
static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15};
void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
uint8_t* dst_argb,
int width) {
asm volatile(
"vld1.8 d8, %3 \n" // shuffler
"1: \n"
"vld1.16 {q0}, [%0]! \n"
"vld1.16 {q1}, [%0]! \n"
"vld1.16 {q2}, [%0]! \n"
"vld1.16 {q3}, [%0]! \n"
"vtbl.8 d0, {d0, d1}, d8 \n"
"vtbl.8 d1, {d2, d3}, d8 \n"
"vtbl.8 d4, {d4, d5}, d8 \n"
"vtbl.8 d5, {d6, d7}, d8 \n"
"subs %2, %2, #8 \n" // 8 processed per loop.
"vst1.8 {q0}, [%1]! \n" // store 4 pixels
"vst1.8 {q2}, [%1]! \n" // store 4 pixels
"bgt 1b \n"
: "+r"(src_ab64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "m"(kShuffleAB64ToARGB) // %3
: "cc", "memory", "q0", "q1", "q2", "q3", "q4");
}
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
asm volatile(
"vmov.u8 d6, #25 \n" // B * 0.1016 coefficient

View File

@ -1565,6 +1565,100 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
: "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23");
}
static const uvec8 kShuffleARGBToABGR = {2, 1, 0, 3, 6, 5, 4, 7,
10, 9, 8, 11, 14, 13, 12, 15};
void ARGBToAR64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ar64,
int width) {
asm volatile(
"1: \n"
"ldp q0, q2, [%0], #32 \n" // load 8 pixels
"mov v1.16b, v0.16b \n"
"mov v3.16b, v2.16b \n"
"prfm pldl1keep, [%0, 448] \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"st2 {v0.16b, v1.16b}, [%1], #32 \n" // store 4 pixels
"st2 {v2.16b, v3.16b}, [%1], #32 \n" // store 4 pixels
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ar64), // %1
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3");
}
void ARGBToAB64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ab64,
int width) {
asm volatile(
"ld1 {v4.16b}, %3 \n" // shuffler
"1: \n"
"ldp q0, q2, [%0], #32 \n" // load 8 pixels
"tbl v0.16b, {v0.16b}, v4.16b \n"
"tbl v2.16b, {v2.16b}, v4.16b \n"
"mov v1.16b, v0.16b \n"
"mov v3.16b, v2.16b \n"
"prfm pldl1keep, [%0, 448] \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"st2 {v0.16b, v1.16b}, [%1], #32 \n" // store 4 pixels
"st2 {v2.16b, v3.16b}, [%1], #32 \n" // store 4 pixels
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
: "m"(kShuffleARGBToABGR) // %3
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
}
static const uvec8 kShuffleAR64ToARGB = {1, 3, 5, 7, 9, 11, 13, 15,
17, 19, 21, 23, 25, 27, 29, 31};
void AR64ToARGBRow_NEON(const uint16_t* src_ar64,
uint8_t* dst_argb,
int width) {
asm volatile(
"ld1 {v4.16b}, %3 \n" // shuffler
"1: \n"
"ldp q0, q1, [%0], #32 \n" // load 4 pixels
"ldp q2, q3, [%0], #32 \n" // load 4 pixels
"prfm pldl1keep, [%0, 448] \n"
"tbl v0.16b, {v0.16b, v1.16b}, v4.16b \n"
"tbl v2.16b, {v2.16b, v3.16b}, v4.16b \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"stp q0, q2, [%1], #32 \n" // store 8 pixels
"b.gt 1b \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "m"(kShuffleAR64ToARGB) // %3
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
}
static const uvec8 kShuffleAB64ToARGB = {5, 3, 1, 7, 13, 11, 9, 15,
21, 19, 17, 23, 29, 27, 25, 31};
void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
uint8_t* dst_argb,
int width) {
asm volatile(
"ld1 {v4.16b}, %3 \n" // shuffler
"1: \n"
"ldp q0, q1, [%0], #32 \n" // load 4 pixels
"ldp q2, q3, [%0], #32 \n" // load 4 pixels
"prfm pldl1keep, [%0, 448] \n"
"tbl v0.16b, {v0.16b, v1.16b}, v4.16b \n"
"tbl v2.16b, {v2.16b, v3.16b}, v4.16b \n"
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"stp q0, q2, [%1], #32 \n" // store 8 pixels
"b.gt 1b \n"
: "+r"(src_ab64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "m"(kShuffleAB64ToARGB) // %3
: "cc", "memory", "v0", "v1", "v2", "v3", "v4");
}
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"movi v4.8b, #25 \n" // B * 0.1016 coefficient
@ -3595,8 +3689,7 @@ void MultiplyRow_16_NEON(const uint16_t* src_y,
asm volatile(
"dup v2.8h, %w2 \n"
"1: \n"
"ldp q0, q1, [%0] \n"
"add %0, %0, #32 \n"
"ldp q0, q1, [%0], #32 \n"
"prfm pldl1keep, [%0, 448] \n"
"mul v0.8h, v0.8h, v2.8h \n"
"mul v1.8h, v1.8h, v2.8h \n"
@ -3619,8 +3712,7 @@ void DivideRow_16_NEON(const uint16_t* src_y,
asm volatile(
"dup v0.8h, %w2 \n"
"1: \n"
"ldp q1, q2, [%0] \n"
"add %0, %0, #32 \n"
"ldp q1, q2, [%0], #32 \n"
"prfm pldl1keep, [%0, 448] \n"
"ushll v3.4s, v1.4h, #0 \n"
"ushll v4.4s, v2.4h, #0 \n"

View File

@ -1214,34 +1214,35 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, N, NEG, OFF) \
#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
const int kStrideA = \
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = \
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, \
kStrideA* kHeightA * sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB * sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB * sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 1, kStrideB* kHeightB); \
memset(dst_argb_opt, 101, kStrideB* kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, kWidth, \
NEG kHeight); \
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \
kStrideB, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_A##To##FMT_B(src_argb + OFF, kStrideA, dst_argb_opt, kStrideB, \
kWidth, NEG kHeight); \
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, \
(TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
for (int i = 0; i < kStrideB * kHeightB * sizeof(TYPE_B); ++i) { \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
@ -1249,8 +1250,8 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, \
STRIDE_B, HEIGHT_B) \
#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
for (int times = 0; times < benchmark_iterations_; ++times) { \
const int kWidth = (fastrand() & 63) + 1; \
@ -1258,24 +1259,25 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
const int kStrideA = \
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = \
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, kStrideA* kHeightA); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
src_argb[i] = (fastrand() & 0xff); \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, kStrideA* kHeightA * sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB * sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, \
kStrideB* kHeightB * sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
src_argb[i] = 0xfe; \
} \
memset(dst_argb_c, 123, kStrideB* kHeightB); \
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B(src_argb, kStrideA, dst_argb_c, kStrideB, kWidth, \
kHeight); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
kStrideB, kWidth, kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
FMT_A##To##FMT_B(src_argb, kStrideA, dst_argb_opt, kStrideB, kWidth, \
kHeight); \
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
kStrideB, kWidth, kHeight); \
for (int i = 0; i < kStrideB * kHeightB * sizeof(TYPE_B); ++i) { \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
@ -1284,77 +1286,87 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
} \
}
#define TESTATOB(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, benchmark_width_ - 4, _Any, +, 0) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, benchmark_width_, _Unaligned, +, 1) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, benchmark_width_, _Invert, -, 0) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, benchmark_width_, _Opt, +, 0) \
TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B)
#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B) \
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
STRIDE_B, HEIGHT_B, benchmark_width_ - 4, _Any, +, 0) \
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
STRIDE_B, HEIGHT_B, benchmark_width_, _Unaligned, +, 1) \
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
STRIDE_B, HEIGHT_B, benchmark_width_, _Invert, -, 0) \
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) \
TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B)
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1)
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1)
TESTATOB(AB30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
TESTATOB(AB30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1)
TESTATOB(ABGR, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
#endif
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1)
TESTATOB(ABGR, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1)
TESTATOB(AR30, uint8_t, 4, 4, 1, AB30, uint8_t, 4, 4, 1)
#endif
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1)
TESTATOB(AR30, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1)
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1)
TESTATOB(AR30, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
TESTATOB(AR30, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
#endif
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, AR30, uint8_t, 4, 4, 1)
#endif
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1)
TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1)
TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1)
TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1)
TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1)
TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1)
TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1)
TESTATOB(RGBA, 4, 4, 1, J400, 1, 1, 1)
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1)
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1)
TESTATOB(ABGR, 4, 4, 1, RAW, 3, 3, 1)
TESTATOB(ABGR, 4, 4, 1, RGB24, 3, 3, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB1555, uint8_t, 2, 2, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGB4444, uint8_t, 2, 2, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
TESTATOB(ABGR, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
TESTATOB(ABGR, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB565, uint8_t, 2, 2, 1)
#endif
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1) // 4
TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1)
TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1)
TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1)
TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1)
TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1)
TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1)
TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1)
TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1)
TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1)
TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1)
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1)
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1)
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1)
TESTATOB(RGB24, 3, 3, 1, RGB24Mirror, 3, 3, 1)
TESTATOB(RAW, 3, 3, 1, J400, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGBA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, UYVY, uint8_t, 2, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, YUY2, uint8_t, 2, 4, 1) // 4
TESTATOB(ARGB1555, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(ARGB4444, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(BGRA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(I400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(I400, uint8_t, 1, 1, 1, I400, uint8_t, 1, 1, 1)
TESTATOB(I400, uint8_t, 1, 1, 1, I400Mirror, uint8_t, 1, 1, 1)
TESTATOB(J400, uint8_t, 1, 1, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(J400, uint8_t, 1, 1, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(RAW, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(RAW, uint8_t, 3, 3, 1, RGBA, uint8_t, 4, 4, 1)
TESTATOB(RAW, uint8_t, 3, 3, 1, RGB24, uint8_t, 3, 3, 1)
TESTATOB(RGB24, uint8_t, 3, 3, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(RGB24, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(RGB24, uint8_t, 3, 3, 1, RGB24Mirror, uint8_t, 3, 3, 1)
TESTATOB(RAW, uint8_t, 3, 3, 1, J400, uint8_t, 1, 1, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1)
TESTATOB(RGB565, uint8_t, 2, 2, 1, ARGB, uint8_t, 4, 4, 1)
#endif
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1)
TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1)
TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1)
TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1)
TESTATOB(RGBA, uint8_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(UYVY, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(YUY2, uint8_t, 2, 4, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(YUY2, uint8_t, 2, 4, 1, Y, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
TESTATOB(ABGR, uint8_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
TESTATOB(ABGR, uint8_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
TESTATOB(AR64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(AB64, uint16_t, 4, 4, 1, ARGB, uint8_t, 4, 4, 1)
TESTATOB(AR64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
TESTATOB(AB64, uint16_t, 4, 4, 1, ABGR, uint8_t, 4, 4, 1)
TESTATOB(AR64, uint16_t, 4, 4, 1, AB64, uint16_t, 4, 4, 1)
TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, N, NEG, OFF) \
@ -1443,35 +1455,38 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1)
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
#endif
#define TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, OFF) \
#define TESTSYMI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, W1280, N, NEG, \
OFF) \
TEST_F(LibYUVConvertTest, FMT_ATOB##_Symetric##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kStrideA = \
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \
align_buffer_page_end(dst_argb_c, kStrideA* kHeightA); \
align_buffer_page_end(dst_argb_opt, kStrideA* kHeightA); \
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
align_buffer_page_end(src_argb, \
kStrideA* kHeightA * sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, kStrideA* kHeightA * sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_opt, kStrideA* kHeightA * sizeof(TYPE_A)); \
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 1, kStrideA* kHeightA); \
memset(dst_argb_opt, 101, kStrideA* kHeightA); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_ATOB(src_argb + OFF, kStrideA, dst_argb_c, kStrideA, kWidth, \
NEG kHeight); \
FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \
kStrideA, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_ATOB(src_argb + OFF, kStrideA, dst_argb_opt, kStrideA, kWidth, \
NEG kHeight); \
FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_opt, \
kStrideA, kWidth, NEG kHeight); \
} \
MaskCpuFlags(disable_cpu_flags_); \
FMT_ATOB(dst_argb_c, kStrideA, dst_argb_c, kStrideA, kWidth, NEG kHeight); \
FMT_ATOB((TYPE_A*)dst_argb_c, kStrideA, (TYPE_A*)dst_argb_c, kStrideA, \
kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
FMT_ATOB(dst_argb_opt, kStrideA, dst_argb_opt, kStrideA, kWidth, \
NEG kHeight); \
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \
kWidth, NEG kHeight); \
for (int i = 0; i < kStrideA * kHeightA * sizeof(TYPE_A); ++i) { \
EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
@ -1480,18 +1495,20 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTSYM(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A) \
TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ - 4, _Any, +, \
0) \
TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, _Unaligned, \
+, 1) \
TESTSYMI(FMT_ATOB, BPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, _Opt, +, 0)
#define TESTSYM(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A) \
TESTSYMI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_ - 4, \
_Any, +, 0) \
TESTSYMI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
_Unaligned, +, 1) \
TESTSYMI(FMT_ATOB, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, benchmark_width_, \
_Opt, +, 0)
TESTSYM(ARGBToARGB, 4, 4, 1)
TESTSYM(ARGBToBGRA, 4, 4, 1)
TESTSYM(ARGBToABGR, 4, 4, 1)
TESTSYM(BGRAToARGB, 4, 4, 1)
TESTSYM(ABGRToARGB, 4, 4, 1)
TESTSYM(ARGBToARGB, uint8_t, 4, 4, 1)
TESTSYM(ARGBToBGRA, uint8_t, 4, 4, 1)
TESTSYM(ARGBToABGR, uint8_t, 4, 4, 1)
TESTSYM(BGRAToARGB, uint8_t, 4, 4, 1)
TESTSYM(ABGRToARGB, uint8_t, 4, 4, 1)
TESTSYM(AB64ToAR64, uint16_t, 4, 4, 1)
TEST_F(LibYUVConvertTest, Test565) {
SIMD_ALIGNED(uint8_t orig_pixels[256][4]);

View File

@ -29,7 +29,7 @@ static bool TestValidFourCC(uint32_t fourcc, int bpp) {
!TestValidChar((fourcc >> 24) & 0xff)) {
return false;
}
if (bpp < 0 || bpp > 32) {
if (bpp < 0 || bpp > 64) {
return false;
}
return true;
@ -72,6 +72,8 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR));
EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30));
EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30));
EXPECT_TRUE(TestValidFourCC(FOURCC_AR64, FOURCC_BPP_AR64));
EXPECT_TRUE(TestValidFourCC(FOURCC_AB64, FOURCC_BPP_AB64));
EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG));
EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW));
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA));