Fix MergeAR64Plane on odd width

R=fbarchard@chromium.org

Bug: libyuv:898
Change-Id: I031e008ea91baba1c7598efa0eda70750cbfce85
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2810066
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Yuan Tong 2021-04-07 19:50:03 +08:00 committed by Frank Barchard
parent 2870320ac6
commit 2cd098f83b
3 changed files with 486 additions and 350 deletions

View File

@ -985,6 +985,139 @@ void MergeRGBPlane(const uint8_t* src_r,
}
}
LIBYUV_NOINLINE
void SplitARGBPlaneAlpha(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_r,
int dst_stride_r,
uint8_t* dst_g,
int dst_stride_g,
uint8_t* dst_b,
int dst_stride_b,
uint8_t* dst_a,
int dst_stride_a,
int width,
int height) {
int y;
void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_a, int width) =
SplitARGBRow_C;
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
dst_stride_a = 0;
}
#if defined(HAS_SPLITARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SplitARGBRow = SplitARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
SplitARGBRow = SplitARGBRow_SSE2;
}
}
#endif
#if defined(HAS_SPLITARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SplitARGBRow = SplitARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
SplitARGBRow = SplitARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_SPLITARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
SplitARGBRow = SplitARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
SplitARGBRow = SplitARGBRow_AVX2;
}
}
#endif
#if defined(HAS_SPLITARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SplitARGBRow = SplitARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
SplitARGBRow = SplitARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
dst_r += dst_stride_r;
dst_g += dst_stride_g;
dst_b += dst_stride_b;
dst_a += dst_stride_a;
src_argb += src_stride_argb;
}
}
LIBYUV_NOINLINE
void SplitARGBPlaneOpaque(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_r,
int dst_stride_r,
uint8_t* dst_g,
int dst_stride_g,
uint8_t* dst_b,
int dst_stride_b,
int width,
int height) {
int y;
void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, int width) = SplitXRGBRow_C;
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
}
#if defined(HAS_SPLITXRGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SplitXRGBRow = SplitXRGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
SplitXRGBRow = SplitXRGBRow_SSE2;
}
}
#endif
#if defined(HAS_SPLITXRGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SplitXRGBRow = SplitXRGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
SplitXRGBRow = SplitXRGBRow_SSSE3;
}
}
#endif
#if defined(HAS_SPLITXRGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
SplitXRGBRow = SplitXRGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
SplitXRGBRow = SplitXRGBRow_AVX2;
}
}
#endif
#if defined(HAS_SPLITXRGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SplitXRGBRow = SplitXRGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
SplitXRGBRow = SplitXRGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
dst_r += dst_stride_r;
dst_g += dst_stride_g;
dst_b += dst_stride_b;
src_argb += src_stride_argb;
}
}
LIBYUV_API
void SplitARGBPlane(const uint8_t* src_argb,
int src_stride_argb,
@ -998,138 +1131,142 @@ void SplitARGBPlane(const uint8_t* src_argb,
int dst_stride_a,
int width,
int height) {
int y;
void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_a, int width) =
SplitARGBRow_C;
void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, int width) = SplitXRGBRow_C;
if (height < 0) {
height = -height;
dst_r = dst_r + (height - 1) * dst_stride_r;
dst_g = dst_g + (height - 1) * dst_stride_g;
dst_b = dst_b + (height - 1) * dst_stride_b;
dst_a = dst_a + (height - 1) * dst_stride_a;
dst_stride_r = -dst_stride_r;
dst_stride_g = -dst_stride_g;
dst_stride_b = -dst_stride_b;
dst_stride_a = -dst_stride_a;
}
if (dst_a == NULL) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_r = dst_r + (height - 1) * dst_stride_r;
dst_g = dst_g + (height - 1) * dst_stride_g;
dst_b = dst_b + (height - 1) * dst_stride_b;
dst_stride_r = -dst_stride_r;
dst_stride_g = -dst_stride_g;
dst_stride_b = -dst_stride_b;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
dst_stride_a = 0;
}
#if defined(HAS_SPLITXRGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SplitXRGBRow = SplitXRGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
SplitXRGBRow = SplitXRGBRow_SSE2;
}
}
#endif
#if defined(HAS_SPLITXRGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SplitXRGBRow = SplitXRGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
SplitXRGBRow = SplitXRGBRow_SSSE3;
}
}
#endif
#if defined(HAS_SPLITXRGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
SplitXRGBRow = SplitXRGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
SplitXRGBRow = SplitXRGBRow_AVX2;
}
}
#endif
#if defined(HAS_SPLITXRGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SplitXRGBRow = SplitXRGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
SplitXRGBRow = SplitXRGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
dst_r += dst_stride_r;
dst_g += dst_stride_g;
dst_b += dst_stride_b;
src_argb += src_stride_argb;
}
SplitARGBPlaneOpaque(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
dst_stride_g, dst_b, dst_stride_b, width, height);
} else {
if (height < 0) {
height = -height;
dst_r = dst_r + (height - 1) * dst_stride_r;
dst_g = dst_g + (height - 1) * dst_stride_g;
dst_b = dst_b + (height - 1) * dst_stride_b;
dst_a = dst_a + (height - 1) * dst_stride_a;
dst_stride_r = -dst_stride_r;
dst_stride_g = -dst_stride_g;
dst_stride_b = -dst_stride_b;
dst_stride_a = -dst_stride_a;
}
SplitARGBPlaneAlpha(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
dst_stride_g, dst_b, dst_stride_b, dst_a, dst_stride_a,
width, height);
}
}
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width &&
dst_stride_a == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
dst_stride_a = 0;
}
LIBYUV_NOINLINE
void MergeARGBPlaneAlpha(const uint8_t* src_r,
int src_stride_r,
const uint8_t* src_g,
int src_stride_g,
const uint8_t* src_b,
int src_stride_b,
const uint8_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, const uint8_t* src_a,
uint8_t* dst_argb, int width) = MergeARGBRow_C;
#if defined(HAS_SPLITARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SplitARGBRow = SplitARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
SplitARGBRow = SplitARGBRow_SSE2;
}
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
src_stride_a == width && dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = src_stride_a =
dst_stride_argb = 0;
}
#if defined(HAS_MERGEARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeARGBRow = MergeARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
MergeARGBRow = MergeARGBRow_SSE2;
}
}
#endif
#if defined(HAS_SPLITARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SplitARGBRow = SplitARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
SplitARGBRow = SplitARGBRow_SSSE3;
}
#if defined(HAS_MERGEARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeARGBRow = MergeARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeARGBRow = MergeARGBRow_AVX2;
}
}
#endif
#if defined(HAS_SPLITARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
SplitARGBRow = SplitARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
SplitARGBRow = SplitARGBRow_AVX2;
}
}
#endif
#if defined(HAS_SPLITARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SplitARGBRow = SplitARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
SplitARGBRow = SplitARGBRow_NEON;
}
#if defined(HAS_MERGEARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeARGBRow = MergeARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
MergeARGBRow = MergeARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
dst_r += dst_stride_r;
dst_g += dst_stride_g;
dst_b += dst_stride_b;
dst_a += dst_stride_a;
src_argb += src_stride_argb;
for (y = 0; y < height; ++y) {
MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
src_a += src_stride_a;
dst_argb += dst_stride_argb;
}
}
LIBYUV_NOINLINE
void MergeARGBPlaneOpaque(const uint8_t* src_r,
int src_stride_r,
const uint8_t* src_g,
int src_stride_g,
const uint8_t* src_b,
int src_stride_b,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height) {
int y;
void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, uint8_t* dst_argb, int width) =
MergeXRGBRow_C;
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
}
#if defined(HAS_MERGEXRGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeXRGBRow = MergeXRGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
MergeXRGBRow = MergeXRGBRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEXRGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeXRGBRow = MergeXRGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeXRGBRow = MergeXRGBRow_AVX2;
}
}
#endif
#if defined(HAS_MERGEXRGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeXRGBRow = MergeXRGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
MergeXRGBRow = MergeXRGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_argb += dst_stride_argb;
}
}
LIBYUV_API
@ -1145,104 +1282,25 @@ void MergeARGBPlane(const uint8_t* src_r,
int dst_stride_argb,
int width,
int height) {
int y;
void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, const uint8_t* src_a,
uint8_t* dst_argb, int width) = MergeARGBRow_C;
void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, uint8_t* dst_argb, int width) =
MergeXRGBRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
if (src_a == NULL) {
// Coalesce rows.
if (src_stride_r == width && src_stride_g == width &&
src_stride_b == width && dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
}
#if defined(HAS_MERGEXRGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeXRGBRow = MergeXRGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
MergeXRGBRow = MergeXRGBRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEXRGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeXRGBRow = MergeXRGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeXRGBRow = MergeXRGBRow_AVX2;
}
}
#endif
#if defined(HAS_MERGEXRGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeXRGBRow = MergeXRGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
MergeXRGBRow = MergeXRGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_argb += dst_stride_argb;
}
MergeARGBPlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
src_stride_b, dst_argb, dst_stride_argb, width,
height);
} else {
if (src_stride_r == width && src_stride_g == width &&
src_stride_b == width && src_stride_a == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = src_stride_a =
dst_stride_argb = 0;
}
#if defined(HAS_MERGEARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeARGBRow = MergeARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
MergeARGBRow = MergeARGBRow_SSE2;
}
}
#endif
#if defined(HAS_MERGEARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeARGBRow = MergeARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeARGBRow = MergeARGBRow_AVX2;
}
}
#endif
#if defined(HAS_MERGEARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeARGBRow = MergeARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
MergeARGBRow = MergeARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_argb += dst_stride_argb;
}
MergeARGBPlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
src_stride_b, src_a, src_stride_a, dst_argb,
dst_stride_argb, width, height);
}
}
// TODO(yuan): Support 2 bit alpha channel.
LIBYUV_API
void MergeXR30Plane(const uint16_t* src_r,
int src_stride_r,
@ -1306,6 +1364,110 @@ void MergeXR30Plane(const uint16_t* src_r,
}
}
LIBYUV_NOINLINE
static void MergeAR64PlaneAlpha(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
const uint16_t* src_a,
int src_stride_a,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height,
int depth) {
int y;
void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g,
const uint16_t* src_b, const uint16_t* src_a,
uint16_t* dst_argb, int depth, int width) =
MergeAR64Row_C;
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
src_stride_a == width && dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = src_stride_a =
dst_stride_ar64 = 0;
}
#if defined(HAS_MERGEAR64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeAR64Row = MergeAR64Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeAR64Row = MergeAR64Row_AVX2;
}
}
#endif
#if defined(HAS_MERGEAR64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeAR64Row = MergeAR64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
MergeAR64Row = MergeAR64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
src_a += src_stride_a;
dst_ar64 += dst_stride_ar64;
}
}
LIBYUV_NOINLINE
static void MergeAR64PlaneOpaque(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height,
int depth) {
int y;
void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g,
const uint16_t* src_b, uint16_t* dst_argb, int depth,
int width) = MergeXR64Row_C;
// Coalesce rows.
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0;
}
#if defined(HAS_MERGEXR64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeXR64Row = MergeXR64Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeXR64Row = MergeXR64Row_AVX2;
}
}
#endif
#if defined(HAS_MERGEXR64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeXR64Row = MergeXR64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
MergeXR64Row = MergeXR64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_ar64 += dst_stride_ar64;
}
}
LIBYUV_API
void MergeAR64Plane(const uint16_t* src_r,
int src_stride_r,
@ -1320,87 +1482,126 @@ void MergeAR64Plane(const uint16_t* src_r,
int width,
int height,
int depth) {
int y;
void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g,
const uint16_t* src_b, const uint16_t* src_a,
uint16_t* dst_argb, int depth, int width) =
MergeAR64Row_C;
void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g,
const uint16_t* src_b, uint16_t* dst_argb, int depth,
int width) = MergeXR64Row_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_ar64 = dst_ar64 + (height - 1) * dst_stride_ar64;
dst_stride_ar64 = -dst_stride_ar64;
}
if (src_a == NULL) {
// Coalesce rows.
if (src_stride_r == width && src_stride_g == width &&
src_stride_b == width && dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0;
}
#if defined(HAS_MERGEXR64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeXR64Row = MergeXR64Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeXR64Row = MergeXR64Row_AVX2;
}
}
#endif
#if defined(HAS_MERGEXR64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeXR64Row = MergeXR64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
MergeXR64Row = MergeXR64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_ar64 += dst_stride_ar64;
}
MergeAR64PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
src_stride_b, dst_ar64, dst_stride_ar64, width, height,
depth);
} else {
if (src_stride_r == width && src_stride_g == width &&
src_stride_b == width && src_stride_a == width &&
dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = src_stride_a =
dst_stride_ar64 = 0;
}
#if defined(HAS_MERGEAR64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeAR64Row = MergeAR64Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeAR64Row = MergeAR64Row_AVX2;
}
MergeAR64PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
src_stride_b, src_a, src_stride_a, dst_ar64,
dst_stride_ar64, width, height, depth);
}
}
LIBYUV_NOINLINE
static void MergeARGB16To8PlaneAlpha(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
const uint16_t* src_a,
int src_stride_a,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int depth) {
int y;
void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
const uint16_t* src_b, const uint16_t* src_a,
uint8_t* dst_argb, int depth, int width) =
MergeARGB16To8Row_C;
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
src_stride_a == width && dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = src_stride_a =
dst_stride_argb = 0;
}
#if defined(HAS_MERGEARGB16TO8ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeARGB16To8Row = MergeARGB16To8Row_AVX2;
}
}
#endif
#if defined(HAS_MERGEAR64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeAR64Row = MergeAR64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
MergeAR64Row = MergeAR64Row_NEON;
}
#if defined(HAS_MERGEARGB16TO8ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
MergeARGB16To8Row = MergeARGB16To8Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_ar64 += dst_stride_ar64;
for (y = 0; y < height; ++y) {
MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
src_a += src_stride_a;
dst_argb += dst_stride_argb;
}
}
LIBYUV_NOINLINE
static void MergeARGB16To8PlaneOpaque(const uint16_t* src_r,
int src_stride_r,
const uint16_t* src_g,
int src_stride_g,
const uint16_t* src_b,
int src_stride_b,
uint8_t* dst_argb,
int dst_stride_argb,
int width,
int height,
int depth) {
int y;
void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
const uint16_t* src_b, uint8_t* dst_argb, int depth,
int width) = MergeXRGB16To8Row_C;
// Coalesce rows.
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
}
#if defined(HAS_MERGEXRGB16TO8ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2;
}
}
#endif
#if defined(HAS_MERGEXRGB16TO8ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
MergeXRGB16To8Row = MergeXRGB16To8Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_argb += dst_stride_argb;
}
}
LIBYUV_API
@ -1417,86 +1618,21 @@ void MergeARGB16To8Plane(const uint16_t* src_r,
int width,
int height,
int depth) {
int y;
void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
const uint16_t* src_b, const uint16_t* src_a,
uint8_t* dst_argb, int depth, int width) =
MergeARGB16To8Row_C;
void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
const uint16_t* src_b, uint8_t* dst_argb, int depth,
int width) = MergeXRGB16To8Row_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
if (src_a == NULL) {
// Coalesce rows.
if (src_stride_r == width && src_stride_g == width &&
src_stride_b == width && dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
}
#if defined(HAS_MERGEXRGB16TO8ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2;
}
}
#endif
#if defined(HAS_MERGEXRGB16TO8ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
MergeXRGB16To8Row = MergeXRGB16To8Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_argb += dst_stride_argb;
}
MergeARGB16To8PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
src_stride_b, dst_argb, dst_stride_argb, width,
height, depth);
} else {
if (src_stride_r == width && src_stride_g == width &&
src_stride_b == width && src_stride_a == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_r = src_stride_g = src_stride_b = src_stride_a =
dst_stride_argb = 0;
}
#if defined(HAS_MERGEARGB16TO8ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
MergeARGB16To8Row = MergeARGB16To8Row_AVX2;
}
}
#endif
#if defined(HAS_MERGEARGB16TO8ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
MergeARGB16To8Row = MergeARGB16To8Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width);
src_r += src_stride_r;
src_g += src_stride_g;
src_b += src_stride_b;
dst_argb += dst_stride_argb;
}
MergeARGB16To8PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
src_stride_b, src_a, src_stride_a, dst_argb,
dst_stride_argb, width, height, depth);
}
}

View File

@ -200,15 +200,15 @@ ANY41CT(I410AlphaToARGBRow_Any_AVX2,
memcpy(temp + 32, b_buf + n, r * SBPP); \
memcpy(temp + 48, a_buf + n, r * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, temp + 48, out, depth, MASK + 1); \
memcpy(dst_ptr + n * BPP, out, r * BPP); \
memcpy((uint8_t *)dst_ptr + n * BPP, out, r * BPP); \
}
#ifdef HAS_MERGEAR64ROW_AVX2
ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 4, 15)
ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
#endif
#ifdef HAS_MERGEAR64ROW_NEON
ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 4, 7)
ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
#endif
#ifdef HAS_MERGEARGB16TO8ROW_AVX2
@ -490,7 +490,7 @@ ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
memcpy(temp + 16, g_buf + n, r * SBPP); \
memcpy(temp + 32, b_buf + n, r * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, out, depth, MASK + 1); \
memcpy(dst_ptr + n * BPP, out, r * BPP); \
memcpy((uint8_t *)dst_ptr + n * BPP, out, r * BPP); \
}
#ifdef HAS_MERGEXR30ROW_AVX2
@ -509,11 +509,11 @@ ANY31PT(MergeXR30Row_10_Any_NEON,
#endif
#ifdef HAS_MERGEXR64ROW_AVX2
ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 4, 15)
ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15)
#endif
#ifdef HAS_MERGEXR64ROW_NEON
ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 4, 7)
ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 8, 7)
#endif
#ifdef HAS_MERGEXRGB16TO8ROW_AVX2

View File

@ -3152,7 +3152,7 @@ TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
#define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kPixels = (kWidth * benchmark_height_ + 15) & ~15; \
const int kPixels = kWidth * benchmark_height_; \
align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \