RGB565ToI420 C matches SIMD ARGB4444, RGB565 and ARGB1555 C versions mimic AVX and Neon

Neon move prfm after loads for all functions.  Example performance improvement
Was
I444ToARGB_Opt (3275 ms)
I444ToNV12_Opt (1509 ms)
Now
I444ToARGB_Opt (2751 ms)
I444ToNV12_Opt (1367 ms)

Bug: libyuv:447
Change-Id: I78bf797b3600084c1eceb0be44cdbc9a575de803
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2189559
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2020-05-08 12:07:04 -07:00 committed by Commit Bot
parent 6cd1ffb1b8
commit d13db1b437
6 changed files with 437 additions and 319 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1752 Version: 1753
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1752 #define LIBYUV_VERSION 1753
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -526,9 +526,9 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
dst_v += 1; \ dst_v += 1; \
} \ } \
if (width & 1) { \ if (width & 1) { \
uint16_t ab = (src_rgb0[B] + src_rgb1[B]); \ uint16_t ab = src_rgb0[B] + src_rgb1[B]; \
uint16_t ag = (src_rgb0[G] + src_rgb1[G]); \ uint16_t ag = src_rgb0[G] + src_rgb1[G]; \
uint16_t ar = (src_rgb0[R] + src_rgb1[R]); \ uint16_t ar = src_rgb0[R] + src_rgb1[R]; \
dst_u[0] = RGB2xToU(ar, ag, ab); \ dst_u[0] = RGB2xToU(ar, ag, ab); \
dst_v[0] = RGB2xToV(ar, ag, ab); \ dst_v[0] = RGB2xToV(ar, ag, ab); \
} \ } \
@ -748,17 +748,38 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
uint8_t b3 = next_rgb565[2] & 0x1f; uint8_t b3 = next_rgb565[2] & 0x1f;
uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
uint8_t r3 = next_rgb565[3] >> 3; uint8_t r3 = next_rgb565[3] >> 3;
uint8_t b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
uint8_t g = (g0 + g1 + g2 + g3); b0 = (b0 << 3) | (b0 >> 2);
uint8_t r = (r0 + r1 + r2 + r3); g0 = (g0 << 2) | (g0 >> 4);
b = (b << 1) | (b >> 6); // 787 -> 888. r0 = (r0 << 3) | (r0 >> 2);
r = (r << 1) | (r >> 6); b1 = (b1 << 3) | (b1 >> 2);
dst_u[0] = RGBToU(r, g, b); g1 = (g1 << 2) | (g1 >> 4);
dst_v[0] = RGBToV(r, g, b); r1 = (r1 << 3) | (r1 >> 2);
src_rgb565 += 4; b2 = (b2 << 3) | (b2 >> 2);
next_rgb565 += 4; g2 = (g2 << 2) | (g2 >> 4);
dst_u += 1; r2 = (r2 << 3) | (r2 >> 2);
dst_v += 1; b3 = (b3 << 3) | (b3 >> 2);
g3 = (g3 << 2) | (g3 >> 4);
r3 = (r3 << 3) | (r3 >> 2);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_rgb565 += 4;
next_rgb565 += 4;
dst_u += 1;
dst_v += 1;
} }
if (width & 1) { if (width & 1) {
uint8_t b0 = src_rgb565[0] & 0x1f; uint8_t b0 = src_rgb565[0] & 0x1f;
@ -767,14 +788,27 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
uint8_t b2 = next_rgb565[0] & 0x1f; uint8_t b2 = next_rgb565[0] & 0x1f;
uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
uint8_t r2 = next_rgb565[1] >> 3; uint8_t r2 = next_rgb565[1] >> 3;
uint8_t b = (b0 + b2); // 565 * 2 = 676.
uint8_t g = (g0 + g2); b0 = (b0 << 3) | (b0 >> 2);
uint8_t r = (r0 + r2); g0 = (g0 << 2) | (g0 >> 4);
b = (b << 2) | (b >> 4); // 676 -> 888 r0 = (r0 << 3) | (r0 >> 2);
g = (g << 1) | (g >> 6); b2 = (b2 << 3) | (b2 >> 2);
r = (r << 2) | (r >> 4); g2 = (g2 << 2) | (g2 >> 4);
dst_u[0] = RGBToU(r, g, b); r2 = (r2 << 3) | (r2 >> 2);
dst_v[0] = RGBToV(r, g, b);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
} }
} }
@ -798,18 +832,38 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
uint8_t b3 = next_argb1555[2] & 0x1f; uint8_t b3 = next_argb1555[2] & 0x1f;
uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2; uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
uint8_t b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
uint8_t g = (g0 + g1 + g2 + g3); b0 = (b0 << 3) | (b0 >> 2);
uint8_t r = (r0 + r1 + r2 + r3); g0 = (g0 << 3) | (g0 >> 2);
b = (b << 1) | (b >> 6); // 777 -> 888. r0 = (r0 << 3) | (r0 >> 2);
g = (g << 1) | (g >> 6); b1 = (b1 << 3) | (b1 >> 2);
r = (r << 1) | (r >> 6); g1 = (g1 << 3) | (g1 >> 2);
dst_u[0] = RGBToU(r, g, b); r1 = (r1 << 3) | (r1 >> 2);
dst_v[0] = RGBToV(r, g, b); b2 = (b2 << 3) | (b2 >> 2);
src_argb1555 += 4; g2 = (g2 << 3) | (g2 >> 2);
next_argb1555 += 4; r2 = (r2 << 3) | (r2 >> 2);
dst_u += 1; b3 = (b3 << 3) | (b3 >> 2);
dst_v += 1; g3 = (g3 << 3) | (g3 >> 2);
r3 = (r3 << 3) | (r3 >> 2);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_argb1555 += 4;
next_argb1555 += 4;
dst_u += 1;
dst_v += 1;
} }
if (width & 1) { if (width & 1) {
uint8_t b0 = src_argb1555[0] & 0x1f; uint8_t b0 = src_argb1555[0] & 0x1f;
@ -818,14 +872,27 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
uint8_t b2 = next_argb1555[0] & 0x1f; uint8_t b2 = next_argb1555[0] & 0x1f;
uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
uint8_t r2 = next_argb1555[1] >> 3; uint8_t r2 = next_argb1555[1] >> 3;
uint8_t b = (b0 + b2); // 555 * 2 = 666.
uint8_t g = (g0 + g2); b0 = (b0 << 3) | (b0 >> 2);
uint8_t r = (r0 + r2); g0 = (g0 << 3) | (g0 >> 2);
b = (b << 2) | (b >> 4); // 666 -> 888. r0 = (r0 << 3) | (r0 >> 2);
g = (g << 2) | (g >> 4); b2 = (b2 << 3) | (b2 >> 2);
r = (r << 2) | (r >> 4); g2 = (g2 << 3) | (g2 >> 2);
dst_u[0] = RGBToU(r, g, b); r2 = (r2 << 3) | (r2 >> 2);
dst_v[0] = RGBToV(r, g, b);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
} }
} }
@ -849,18 +916,38 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
uint8_t b3 = next_argb4444[2] & 0x0f; uint8_t b3 = next_argb4444[2] & 0x0f;
uint8_t g3 = next_argb4444[2] >> 4; uint8_t g3 = next_argb4444[2] >> 4;
uint8_t r3 = next_argb4444[3] & 0x0f; uint8_t r3 = next_argb4444[3] & 0x0f;
uint8_t b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
uint8_t g = (g0 + g1 + g2 + g3); b0 = (b0 << 4) | b0;
uint8_t r = (r0 + r1 + r2 + r3); g0 = (g0 << 4) | g0;
b = (b << 2) | (b >> 4); // 666 -> 888. r0 = (r0 << 4) | r0;
g = (g << 2) | (g >> 4); b1 = (b1 << 4) | b1;
r = (r << 2) | (r >> 4); g1 = (g1 << 4) | g1;
dst_u[0] = RGBToU(r, g, b); r1 = (r1 << 4) | r1;
dst_v[0] = RGBToV(r, g, b); b2 = (b2 << 4) | b2;
src_argb4444 += 4; g2 = (g2 << 4) | g2;
next_argb4444 += 4; r2 = (r2 << 4) | r2;
dst_u += 1; b3 = (b3 << 4) | b3;
dst_v += 1; g3 = (g3 << 4) | g3;
r3 = (r3 << 4) | r3;
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
src_argb4444 += 4;
next_argb4444 += 4;
dst_u += 1;
dst_v += 1;
} }
if (width & 1) { if (width & 1) {
uint8_t b0 = src_argb4444[0] & 0x0f; uint8_t b0 = src_argb4444[0] & 0x0f;
@ -869,14 +956,27 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
uint8_t b2 = next_argb4444[0] & 0x0f; uint8_t b2 = next_argb4444[0] & 0x0f;
uint8_t g2 = next_argb4444[0] >> 4; uint8_t g2 = next_argb4444[0] >> 4;
uint8_t r2 = next_argb4444[1] & 0x0f; uint8_t r2 = next_argb4444[1] & 0x0f;
uint8_t b = (b0 + b2); // 444 * 2 = 555.
uint8_t g = (g0 + g2); b0 = (b0 << 4) | b0;
uint8_t r = (r0 + r2); g0 = (g0 << 4) | g0;
b = (b << 3) | (b >> 2); // 555 -> 888. r0 = (r0 << 4) | r0;
g = (g << 3) | (g >> 2); b2 = (b2 << 4) | b2;
r = (r << 3) | (r >> 2); g2 = (g2 << 4) | g2;
dst_u[0] = RGBToU(r, g, b); r2 = (r2 << 4) | r2;
dst_v[0] = RGBToV(r, g, b);
#if LIBYUV_ARGBTOUV_PAVGB
uint8_t ab = AVGB(b0, b2);
uint8_t ag = AVGB(g0, g2);
uint8_t ar = AVGB(r0, r2);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
#else
uint16_t b = b0 + b2;
uint16_t g = g0 + g2;
uint16_t r = r0 + r2;
dst_u[0] = RGB2xToU(r, g, b);
dst_v[0] = RGB2xToV(r, g, b);
#endif
} }
} }

View File

@ -1429,7 +1429,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
"vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n" "vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 32 processed per loop. "subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q0, q1, q2) RGBTOUV(q0, q1, q2)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
@ -1475,7 +1475,7 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
"vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n" "vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 32 processed per loop. "subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q0, q1, q2) RGBTOUV(q0, q1, q2)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
@ -1520,7 +1520,7 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra,
"vrshr.u16 q2, q2, #1 \n" "vrshr.u16 q2, q2, #1 \n"
"vrshr.u16 q3, q3, #1 \n" "vrshr.u16 q3, q3, #1 \n"
"subs %4, %4, #16 \n" // 32 processed per loop. "subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q3, q2, q1) RGBTOUV(q3, q2, q1)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
@ -1565,7 +1565,7 @@ void ABGRToUVRow_NEON(const uint8_t* src_abgr,
"vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n" "vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 32 processed per loop. "subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q2, q1, q0) RGBTOUV(q2, q1, q0)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
@ -1610,7 +1610,7 @@ void RGBAToUVRow_NEON(const uint8_t* src_rgba,
"vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n" "vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 32 processed per loop. "subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q0, q1, q2) RGBTOUV(q0, q1, q2)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
@ -1655,7 +1655,7 @@ void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
"vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n" "vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 32 processed per loop. "subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q0, q1, q2) RGBTOUV(q0, q1, q2)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
@ -1700,7 +1700,7 @@ void RAWToUVRow_NEON(const uint8_t* src_raw,
"vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n" "vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 32 processed per loop. "subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q2, q1, q0) RGBTOUV(q2, q1, q0)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
@ -1886,21 +1886,12 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
"vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts.
"vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts.
"vrshr.u16 q4, q4, #1 \n" // 2x average "vrshr.u16 q0, q4, #1 \n" // 2x average
"vrshr.u16 q5, q5, #1 \n" "vrshr.u16 q1, q5, #1 \n"
"vrshr.u16 q6, q6, #1 \n" "vrshr.u16 q2, q6, #1 \n"
"subs %4, %4, #16 \n" // 16 processed per loop. "subs %4, %4, #16 \n" // 16 processed per loop.
"vmul.s16 q8, q4, q10 \n" // B RGBTOUV(q0, q1, q2)
"vmls.s16 q8, q5, q11 \n" // G
"vmls.s16 q8, q6, q12 \n" // R
"vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
"vmul.s16 q9, q6, q10 \n" // R
"vmls.s16 q9, q5, q14 \n" // G
"vmls.s16 q9, q4, q13 \n" // B
"vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
"vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
"vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n" "bgt 1b \n"

File diff suppressed because it is too large Load Diff

View File

@ -801,7 +801,7 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
#endif #endif
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ #define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, DIFF, N, NEG, OFF) \ W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
@ -833,14 +833,12 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
} \ } \
for (int i = 0; i < kHeight; ++i) { \ for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \ for (int j = 0; j < kWidth; ++j) { \
EXPECT_EQ(static_cast<int>(dst_y_c[i * kWidth + j]), \ EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
static_cast<int>(dst_y_opt[i * kWidth + j])); \
} \ } \
} \ } \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \
for (int j = 0; j < kStrideUV; ++j) { \ for (int j = 0; j < kStrideUV; ++j) { \
EXPECT_NEAR(static_cast<int>(dst_uv_c[i * kStrideUV + j]), \ EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \
static_cast<int>(dst_uv_opt[i * kStrideUV + j]), DIFF); \
} \ } \
} \ } \
free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(dst_y_c); \
@ -850,39 +848,38 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
free_aligned_buffer_page_end(src_argb); \ free_aligned_buffer_page_end(src_argb); \
} }
#define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ #define TESTATOPLANAR(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
DIFF) \
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_ - 4, DIFF, _Any, +, 0) \ benchmark_width_ - 4, _Any, +, 0) \
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, DIFF, _Unaligned, +, 1) \ benchmark_width_, _Unaligned, +, 1) \
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, DIFF, _Invert, -, 0) \ benchmark_width_, _Invert, -, 0) \
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \ TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, DIFF, _Opt, +, 0) benchmark_width_, _Opt, +, 0)
TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 0) TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2)
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 0) TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2)
TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 0) TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 0) TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, 0) TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, 0) TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST #ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 9) TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2)
#endif #endif
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 0) TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2)
TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 0) TESTATOPLANAR(I400, 1, 1, I420, 2, 2)
TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 0) TESTATOPLANAR(J400, 1, 1, J420, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 0) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 0) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2)
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 0) TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2)
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 0) TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2)
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 0) TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2)
TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 0) TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 0) TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 0) TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \ #define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
SUBSAMP_Y, W1280, N, NEG, OFF) \ SUBSAMP_Y, W1280, N, NEG, OFF) \
@ -2201,7 +2198,7 @@ TEST_F(LibYUVConvertTest, TestDither) {
} }
#define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ #define TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C) \ YALIGN, W1280, N, NEG, OFF, FMT_C, BPP_C) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
@ -2232,7 +2229,6 @@ TEST_F(LibYUVConvertTest, TestDither) {
src_y + OFF, kWidth, src_u + OFF, kStrideUV, src_v + OFF, kStrideUV, \ src_y + OFF, kWidth, src_u + OFF, kStrideUV, src_v + OFF, kStrideUV, \
dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \ dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \
} \ } \
int max_diff = 0; \
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \ align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \
align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \ align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \
@ -2243,13 +2239,8 @@ TEST_F(LibYUVConvertTest, TestDither) {
FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \ FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \
kWidth * BPP_C, kWidth, kHeight); \ kWidth * BPP_C, kWidth, kHeight); \
for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \
int abs_diff = abs(static_cast<int>(dst_argb32_c[i]) - \ EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \
static_cast<int>(dst_argb32_opt[i])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \ } \
EXPECT_LE(max_diff, DIFF); \
free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \ free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \ free_aligned_buffer_page_end(src_v); \
@ -2259,22 +2250,21 @@ TEST_F(LibYUVConvertTest, TestDither) {
free_aligned_buffer_page_end(dst_argb32_opt); \ free_aligned_buffer_page_end(dst_argb32_opt); \
} }
#define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ #define TESTPLANARTOBD(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, DIFF, FMT_C, BPP_C) \ YALIGN, FMT_C, BPP_C) \
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, \ YALIGN, benchmark_width_ - 4, _Any, +, 0, FMT_C, BPP_C) \
BPP_C) \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ YALIGN, benchmark_width_, _Unaligned, +, 1, FMT_C, BPP_C) \
YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
BPP_C) \ YALIGN, benchmark_width_, _Invert, -, 0, FMT_C, BPP_C) \
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C) \ YALIGN, benchmark_width_, _Opt, +, 0, FMT_C, BPP_C)
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
#ifdef LITTLE_ENDIAN_ONLY_TEST #ifdef LITTLE_ENDIAN_ONLY_TEST
TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4) TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
#endif #endif
#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \ #define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
TEST_F(LibYUVConvertTest, NAME) { \ TEST_F(LibYUVConvertTest, NAME) { \
const int kWidth = benchmark_width_; \ const int kWidth = benchmark_width_; \