yuvconstants for all YUV to RGB conversion functions.

R=harryjin@google.com
BUG=libyuv:488

Review URL: https://codereview.chromium.org/1363503002 .
This commit is contained in:
Frank Barchard 2015-09-22 10:26:03 -07:00
parent 62c49dc811
commit f96890a0be
11 changed files with 1522 additions and 1811 deletions

File diff suppressed because it is too large Load Diff

View File

@ -56,6 +56,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I444ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
@ -103,7 +104,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I444ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@ -124,6 +125,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I444ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
@ -171,7 +173,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I444ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I444ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
@ -192,6 +194,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
@ -248,7 +251,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@ -269,6 +272,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I411ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
@ -316,7 +320,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@ -338,6 +342,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
ARGBCopyYToAlphaRow_C;
@ -436,7 +441,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
ARGBCopyYToAlphaRow(src_a, dst_argb, width);
ARGBAttenuateRow(dst_argb, dst_argb, width);
dst_argb += dst_stride_argb;
@ -462,6 +467,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
ARGBCopyYToAlphaRow_C;
@ -560,7 +566,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
ARGBCopyYToAlphaRow(src_a, dst_abgr, width);
ARGBAttenuateRow(dst_abgr, dst_abgr, width);
dst_abgr += dst_stride_abgr;
@ -639,7 +645,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
J400ToARGBRow_C;
if (!src_y || !dst_argb ||
width <= 0 || height == 0) {
@ -766,7 +772,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RGB24ToARGBRow_C;
if (!src_rgb24 || !dst_argb ||
width <= 0 || height == 0) {
@ -816,7 +822,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RAWToARGBRow_C;
if (!src_raw || !dst_argb ||
width <= 0 || height == 0) {
@ -866,7 +872,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
RGB565ToARGBRow_C;
if (!src_rgb565 || !dst_argb ||
width <= 0 || height == 0) {
@ -925,7 +931,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
int width, int height) {
int y;
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
int pix) = ARGB1555ToARGBRow_C;
int width) = ARGB1555ToARGBRow_C;
if (!src_argb1555 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@ -983,7 +989,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
int width, int height) {
int y;
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
int pix) = ARGB4444ToARGBRow_C;
int width) = ARGB4444ToARGBRow_C;
if (!src_argb4444 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@ -1044,6 +1050,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = NV12ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
@ -1081,7 +1088,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, width);
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@ -1098,10 +1105,11 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*NV21ToARGBRow)(const uint8* y_buf,
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV21ToARGBRow_C;
struct YuvConstants* yuvconstants,
int width) = NV12ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@ -1112,33 +1120,33 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_NV21TOARGBROW_SSSE3)
#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_NV21TOARGBROW_AVX2)
#if defined(HAS_NV12TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
NV21ToARGBRow = NV21ToARGBRow_AVX2;
NV12ToARGBRow = NV12ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_NV21TOARGBROW_NEON)
#if defined(HAS_NV12TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_NEON;
NV12ToARGBRow = NV12ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_uv, dst_argb, width);
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYvuConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@ -1157,6 +1165,7 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = NV12ToARGBRow_C;
if (!src_m420 || !dst_argb ||
width <= 0 || height == 0) {
@ -1194,14 +1203,16 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
#endif
for (y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
&kYuvConstants, width);
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
dst_argb + dst_stride_argb, width);
dst_argb + dst_stride_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb * 2;
src_m420 += src_stride_m420 * 3;
}
if (height & 1) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
&kYuvConstants, width);
}
return 0;
}
@ -1212,7 +1223,10 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
void (*YUY2ToARGBRow)(const uint8* src_yuy2,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) =
YUY2ToARGBRow_C;
if (!src_yuy2 || !dst_argb ||
width <= 0 || height == 0) {
@ -1256,7 +1270,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
}
#endif
for (y = 0; y < height; ++y) {
YUY2ToARGBRow(src_yuy2, dst_argb, width);
YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvConstants, width);
src_yuy2 += src_stride_yuy2;
dst_argb += dst_stride_argb;
}
@ -1269,7 +1283,10 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
void (*UYVYToARGBRow)(const uint8* src_uyvy,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) =
UYVYToARGBRow_C;
if (!src_uyvy || !dst_argb ||
width <= 0 || height == 0) {
@ -1313,7 +1330,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
}
#endif
for (y = 0; y < height; ++y) {
UYVYToARGBRow(src_uyvy, dst_argb, width);
UYVYToARGBRow(src_uyvy, dst_argb, &kYuvConstants, width);
src_uyvy += src_stride_uyvy;
dst_argb += dst_stride_argb;
}
@ -1328,11 +1345,12 @@ int J420ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*J422ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = J422ToARGBRow_C;
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@ -1343,42 +1361,42 @@ int J420ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_J422TOARGBROW_SSSE3)
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
J422ToARGBRow = J422ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_J422TOARGBROW_AVX2)
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
J422ToARGBRow = J422ToARGBRow_Any_AVX2;
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
J422ToARGBRow = J422ToARGBRow_AVX2;
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_J422TOARGBROW_NEON)
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
J422ToARGBRow = J422ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
J422ToARGBRow = J422ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@ -1397,11 +1415,12 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*J422ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = J422ToARGBRow_C;
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
@ -1422,42 +1441,42 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
#if defined(HAS_J422TOARGBROW_SSSE3)
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
J422ToARGBRow = J422ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_J422TOARGBROW_AVX2)
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
J422ToARGBRow = J422ToARGBRow_Any_AVX2;
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
J422ToARGBRow = J422ToARGBRow_AVX2;
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_J422TOARGBROW_NEON)
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
J422ToARGBRow = J422ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
J422ToARGBRow = J422ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@ -1474,11 +1493,12 @@ int J420ToABGR(const uint8* src_y, int src_stride_y,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*J422ToABGRRow)(const uint8* y_buf,
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = J422ToABGRRow_C;
struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
return -1;
@ -1489,42 +1509,42 @@ int J420ToABGR(const uint8* src_y, int src_stride_y,
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
#if defined(HAS_J422TOABGRROW_SSSE3)
#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
J422ToABGRRow = J422ToABGRRow_SSSE3;
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
#if defined(HAS_J422TOABGRROW_AVX2)
#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
J422ToABGRRow = J422ToABGRRow_Any_AVX2;
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
J422ToABGRRow = J422ToABGRRow_AVX2;
I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
#if defined(HAS_J422TOABGRROW_NEON)
#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
J422ToABGRRow = J422ToABGRRow_Any_NEON;
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
J422ToABGRRow = J422ToABGRRow_NEON;
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
@ -1543,11 +1563,12 @@ int J422ToABGR(const uint8* src_y, int src_stride_y,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*J422ToABGRRow)(const uint8* y_buf,
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = J422ToABGRRow_C;
struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
width <= 0 || height == 0) {
@ -1568,42 +1589,42 @@ int J422ToABGR(const uint8* src_y, int src_stride_y,
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
}
#if defined(HAS_J422TOABGRROW_SSSE3)
#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
J422ToABGRRow = J422ToABGRRow_SSSE3;
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
#if defined(HAS_J422TOABGRROW_AVX2)
#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
J422ToABGRRow = J422ToABGRRow_Any_AVX2;
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
J422ToABGRRow = J422ToABGRRow_AVX2;
I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
#if defined(HAS_J422TOABGRROW_NEON)
#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
J422ToABGRRow = J422ToABGRRow_Any_NEON;
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
J422ToABGRRow = J422ToABGRRow_NEON;
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
@ -1620,11 +1641,12 @@ int H420ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*H422ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = H422ToARGBRow_C;
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@ -1635,42 +1657,42 @@ int H420ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_H422TOARGBROW_SSSE3)
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
H422ToARGBRow = H422ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_H422TOARGBROW_AVX2)
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
H422ToARGBRow = H422ToARGBRow_Any_AVX2;
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
H422ToARGBRow = H422ToARGBRow_AVX2;
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_H422TOARGBROW_NEON)
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
H422ToARGBRow = H422ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
H422ToARGBRow = H422ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@ -1689,11 +1711,12 @@ int H422ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*H422ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = H422ToARGBRow_C;
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
@ -1714,42 +1737,42 @@ int H422ToARGB(const uint8* src_y, int src_stride_y,
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
#if defined(HAS_H422TOARGBROW_SSSE3)
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
H422ToARGBRow = H422ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_H422TOARGBROW_AVX2)
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
H422ToARGBRow = H422ToARGBRow_Any_AVX2;
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
H422ToARGBRow = H422ToARGBRow_AVX2;
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_H422TOARGBROW_NEON)
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
H422ToARGBRow = H422ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
H422ToARGBRow = H422ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@ -1766,11 +1789,12 @@ int H420ToABGR(const uint8* src_y, int src_stride_y,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*H422ToABGRRow)(const uint8* y_buf,
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = H422ToABGRRow_C;
struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
return -1;
@ -1781,42 +1805,42 @@ int H420ToABGR(const uint8* src_y, int src_stride_y,
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
#if defined(HAS_H422TOABGRROW_SSSE3)
#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
H422ToABGRRow = H422ToABGRRow_SSSE3;
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
#if defined(HAS_H422TOABGRROW_AVX2)
#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
H422ToABGRRow = H422ToABGRRow_Any_AVX2;
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
H422ToABGRRow = H422ToABGRRow_AVX2;
I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
#if defined(HAS_H422TOABGRROW_NEON)
#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
H422ToABGRRow = H422ToABGRRow_Any_NEON;
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
H422ToABGRRow = H422ToABGRRow_NEON;
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
@ -1835,11 +1859,12 @@ int H422ToABGR(const uint8* src_y, int src_stride_y,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*H422ToABGRRow)(const uint8* y_buf,
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = H422ToABGRRow_C;
struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
width <= 0 || height == 0) {
@ -1860,42 +1885,42 @@ int H422ToABGR(const uint8* src_y, int src_stride_y,
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
}
#if defined(HAS_H422TOABGRROW_SSSE3)
#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
H422ToABGRRow = H422ToABGRRow_SSSE3;
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
#if defined(HAS_H422TOABGRROW_AVX2)
#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
H422ToABGRRow = H422ToABGRRow_Any_AVX2;
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
H422ToABGRRow = H422ToABGRRow_AVX2;
I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
#if defined(HAS_H422TOABGRROW_NEON)
#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
H422ToABGRRow = H422ToABGRRow_Any_NEON;
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
H422ToABGRRow = H422ToABGRRow_NEON;
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;

View File

@ -462,6 +462,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
width <= 0 || height == 0) {
@ -508,7 +509,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@ -531,6 +532,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToBGRARow_C;
if (!src_y || !src_u || !src_v || !dst_bgra ||
width <= 0 || height == 0) {
@ -577,7 +579,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
@ -600,6 +602,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
@ -637,7 +640,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
@ -660,6 +663,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToRGBARow_C;
if (!src_y || !src_u || !src_v || !dst_rgba ||
width <= 0 || height == 0) {
@ -697,7 +701,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
dst_rgba += dst_stride_rgba;
src_y += src_stride_y;
if (y & 1) {
@ -720,6 +724,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToRGB24Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
width <= 0 || height == 0) {
@ -757,7 +762,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, &kYuvConstants, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
if (y & 1) {
@ -780,6 +785,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToRAWRow_C;
if (!src_y || !src_u || !src_v || !dst_raw ||
width <= 0 || height == 0) {
@ -817,7 +823,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
I422ToRAWRow(src_y, src_u, src_v, dst_raw, &kYuvConstants, width);
dst_raw += dst_stride_raw;
src_y += src_stride_y;
if (y & 1) {
@ -840,6 +846,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToARGB1555Row_C;
if (!src_y || !src_u || !src_v || !dst_argb1555 ||
width <= 0 || height == 0) {
@ -877,7 +884,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvConstants, width);
dst_argb1555 += dst_stride_argb1555;
src_y += src_stride_y;
if (y & 1) {
@ -901,6 +908,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToARGB4444Row_C;
if (!src_y || !src_u || !src_v || !dst_argb4444 ||
width <= 0 || height == 0) {
@ -938,7 +946,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvConstants, width);
dst_argb4444 += dst_stride_argb4444;
src_y += src_stride_y;
if (y & 1) {
@ -961,6 +969,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToRGB565Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
width <= 0 || height == 0) {
@ -998,7 +1007,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvConstants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@ -1029,6 +1038,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
@ -1105,7 +1115,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
// Allocate a row of argb.
align_buffer_64(row_argb, width * 4);
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvConstants, width);
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
dst_rgb565 += dst_stride_rgb565;

View File

@ -287,9 +287,9 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
int width, int height) {
int y;
void (*YUY2ToUV422Row)(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix) =
uint8* dst_u, uint8* dst_v, int width) =
YUY2ToUV422Row_C;
void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
YUY2ToYRow_C;
// Negative height means invert the image.
if (height < 0) {
@ -359,10 +359,10 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
int width, int height) {
int y;
void (*UYVYToUV422Row)(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix) =
uint8* dst_u, uint8* dst_v, int width) =
UYVYToUV422Row_C;
void (*UYVYToYRow)(const uint8* src_uyvy,
uint8* dst_y, int pix) = UYVYToYRow_C;
uint8* dst_y, int width) = UYVYToYRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
@ -790,6 +790,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToBGRARow_C;
if (!src_y || !src_u || !src_v ||
!dst_bgra ||
@ -846,7 +847,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
src_u += src_stride_u;
@ -867,6 +868,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
@ -914,7 +916,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
@ -935,6 +937,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToRGBARow_C;
if (!src_y || !src_u || !src_v ||
!dst_rgba ||
@ -982,7 +985,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
dst_rgba += dst_stride_rgba;
src_y += src_stride_y;
src_u += src_stride_u;
@ -1001,6 +1004,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
void (*NV12ToRGB565Row)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = NV12ToRGB565Row_C;
if (!src_y || !src_uv || !dst_rgb565 ||
width <= 0 || height == 0) {
@ -1038,7 +1042,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvConstants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@ -1055,10 +1059,11 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
int y;
void (*NV21ToRGB565Row)(const uint8* y_buf,
void (*NV12ToRGB565Row)(const uint8* y_buf,
const uint8* src_vu,
uint8* rgb_buf,
int width) = NV21ToRGB565Row_C;
struct YuvConstants* yuvconstants,
int width) = NV12ToRGB565Row_C;
if (!src_y || !src_vu || !dst_rgb565 ||
width <= 0 || height == 0) {
return -1;
@ -1069,33 +1074,33 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
dst_stride_rgb565 = -dst_stride_rgb565;
}
#if defined(HAS_NV21TORGB565ROW_SSSE3)
#if defined(HAS_NV12TORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
}
}
#endif
#if defined(HAS_NV21TORGB565ROW_AVX2)
#if defined(HAS_NV12TORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2;
NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
NV21ToRGB565Row = NV21ToRGB565Row_AVX2;
NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
}
}
#endif
#if defined(HAS_NV21TORGB565ROW_NEON)
#if defined(HAS_NV12TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV21ToRGB565Row = NV21ToRGB565Row_NEON;
NV12ToRGB565Row = NV12ToRGB565Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
NV12ToRGB565Row(src_y, src_vu, dst_rgb565, &kYvuConstants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@ -1110,7 +1115,7 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height,
uint32 value) {
int y;
void (*SetRow)(uint8* dst, uint8 value, int pix) = SetRow_C;
void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C;
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
@ -1186,7 +1191,7 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 value) {
int y;
void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int pix) = ARGBSetRow_C;
void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C;
if (!dst_argb ||
width <= 0 || height == 0 ||
dst_x < 0 || dst_y < 0) {
@ -1909,7 +1914,7 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
const uint8* shuffler, int width, int height) {
int y;
void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
const uint8* shuffler, int pix) = ARGBShuffleRow_C;
const uint8* shuffler, int width) = ARGBShuffleRow_C;
if (!src_bgra || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@ -1976,7 +1981,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
const uint8* src_sobely,
uint8* dst, int width)) {
int y;
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) =
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
ARGBToYJRow_C;
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C;
@ -2360,8 +2365,8 @@ int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
int width, int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
SplitUVRow_C;
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) = SplitUVRow_C;
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
@ -2464,8 +2469,8 @@ int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
int width, int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
SplitUVRow_C;
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) = SplitUVRow_C;
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;

View File

@ -40,103 +40,9 @@ extern "C" {
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7)
ANY31(J422ToABGRRow_Any_SSSE3, J422ToABGRRow_SSSE3, 1, 0, 4, 7)
ANY31(H422ToARGBRow_Any_SSSE3, H422ToARGBRow_SSSE3, 1, 0, 4, 7)
ANY31(H422ToABGRRow_Any_SSSE3, H422ToABGRRow_SSSE3, 1, 0, 4, 7)
#endif
#ifdef HAS_I444TOARGBROW_SSSE3
ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
#ifdef HAS_I422TOYUY2ROW_SSE2
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
#endif // HAS_I444TOARGBROW_SSSE3
#ifdef HAS_I444TOABGRROW_SSSE3
ANY31(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
#endif
#ifdef HAS_I422TORGB24ROW_AVX2
ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
#endif
#ifdef HAS_I422TORAWROW_AVX2
ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
#endif
#ifdef HAS_J422TOARGBROW_AVX2
ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_J422TOABGRROW_AVX2
ANY31(J422ToABGRRow_Any_AVX2, J422ToABGRRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_H422TOARGBROW_AVX2
ANY31(H422ToARGBRow_Any_AVX2, H422ToARGBRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_H422TOABGRROW_AVX2
ANY31(H422ToABGRRow_Any_AVX2, H422ToABGRRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TOARGBROW_AVX2
ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TOBGRAROW_AVX2
ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TORGBAROW_AVX2
ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TOABGRROW_AVX2
ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I444TOARGBROW_AVX2
ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_I444TOABGRROW_AVX2
ANY31(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_I411TOARGBROW_AVX2
ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
#endif
#ifdef HAS_I422TOARGB4444ROW_AVX2
ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGB1555ROW_AVX2
ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TORGB565ROW_AVX2
ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_NEON
ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif
#ifdef HAS_J422TOARGBROW_NEON
ANY31(J422ToARGBRow_Any_NEON, J422ToARGBRow_NEON, 1, 0, 4, 7)
#endif
#ifdef HAS_J422TOABGRROW_NEON
ANY31(J422ToABGRRow_Any_NEON, J422ToABGRRow_NEON, 1, 0, 4, 7)
#endif
#ifdef HAS_H422TOARGBROW_NEON
ANY31(H422ToARGBRow_Any_NEON, H422ToARGBRow_NEON, 1, 0, 4, 7)
#endif
#ifdef HAS_H422TOABGRROW_NEON
ANY31(H422ToABGRRow_Any_NEON, H422ToABGRRow_NEON, 1, 0, 4, 7)
#endif
#ifdef HAS_I422TOYUY2ROW_NEON
ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
@ -144,7 +50,97 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOUYVYROW_NEON
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#endif
#undef ANY31
#undef ANY31C
// Any 3 planes to 1 with yuvconstants
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
uint8* dst_ptr, struct YuvConstants* yuvconstants, \
int width) { \
SIMD_ALIGNED(uint8 temp[64 * 4]); \
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r); \
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \
yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
ANY31C(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
#endif
#ifdef HAS_I444TOARGBROW_SSSE3
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
#endif // HAS_I444TOARGBROW_SSSE3
#ifdef HAS_I444TOABGRROW_SSSE3
ANY31C(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
#endif
#ifdef HAS_I422TORGB24ROW_AVX2
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
#endif
#ifdef HAS_I422TORAWROW_AVX2
ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
#endif
#ifdef HAS_I422TOARGBROW_AVX2
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TOBGRAROW_AVX2
ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TORGBAROW_AVX2
ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TOABGRROW_AVX2
ANY31C(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I444TOARGBROW_AVX2
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_I444TOABGRROW_AVX2
ANY31C(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
#endif
#ifdef HAS_I411TOARGBROW_AVX2
ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
#endif
#ifdef HAS_I422TOARGB4444ROW_AVX2
ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGB1555ROW_AVX2
ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TORGB565ROW_AVX2
ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_NEON
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
ANY31C(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif
#undef ANY31C
// Any 2 planes to 1.
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
@ -164,32 +160,6 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
// Biplanar to RGB.
#ifdef HAS_NV12TOARGBROW_SSSE3
ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_AVX2
ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#endif
#ifdef HAS_NV12TOARGBROW_NEON
ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_SSSE3
ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_AVX2
ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
#endif
#ifdef HAS_NV12TORGB565ROW_NEON
ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7)
#endif
// Merge functions.
#ifdef HAS_MERGEUVROW_SSE2
ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
@ -249,6 +219,46 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
#endif
#undef ANY21
// Any 2 planes to 1 with yuvconstants
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
uint8* dst_ptr, struct YuvConstants* yuvconstants, \
int width) { \
SIMD_ALIGNED(uint8 temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \
ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
// Biplanar to RGB.
#ifdef HAS_NV12TOARGBROW_SSSE3
ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_AVX2
ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#endif
#ifdef HAS_NV12TOARGBROW_NEON
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_SSSE3
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_AVX2
ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
#endif
#ifdef HAS_NV12TORGB565ROW_NEON
ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
#endif
#undef ANY21C
// Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
@ -297,9 +307,7 @@ ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
#if defined(HAS_I400TOARGBROW_AVX2)
ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
#endif
#if defined(HAS_YUY2TOARGBROW_SSSE3)
ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
#if defined(HAS_RGB24TOARGBROW_SSSE3)
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
@ -315,10 +323,6 @@ ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
#endif
#if defined(HAS_YUY2TOARGBROW_AVX2)
ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
#endif
#if defined(HAS_ARGBTORGB24ROW_NEON)
ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
@ -327,8 +331,6 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
#endif
#ifdef HAS_ARGBTOYROW_AVX2
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
@ -426,6 +428,35 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#endif
#undef ANY11
// Any 1 to 1 with yuvconstants
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(uint8 temp[128 * 2]); \
memset(temp, 0, 128); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#if defined(HAS_YUY2TOARGBROW_SSSE3)
ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
#endif
#if defined(HAS_YUY2TOARGBROW_AVX2)
ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
#endif
#if defined(HAS_YUY2TOARGBROW_NEON)
ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
#endif
#undef ANY11C
// Any 1 to 1 blended.
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \

File diff suppressed because it is too large Load Diff

View File

@ -1350,23 +1350,23 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
"punpcklwd %%xmm0,%%xmm0 \n"
// Convert 8 pixels: 8 UV and 8 Y
#define YUVTORGB(YuvConstants) \
#define YUVTORGB(yuvconstants) \
"movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \
"movdqa %%xmm0,%%xmm3 \n" \
"movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \
"pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \
"movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \
"pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \
"psubw %%xmm1,%%xmm0 \n" \
"movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \
"pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \
"movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \
"pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \
"psubw %%xmm2,%%xmm1 \n" \
"movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \
"pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \
"movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \
"pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \
"psubw %%xmm3,%%xmm2 \n" \
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
"punpcklbw %%xmm3,%%xmm3 \n" \
"pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \
"pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \
"paddsw %%xmm3,%%xmm0 \n" \
"paddsw %%xmm3,%%xmm1 \n" \
"paddsw %%xmm3,%%xmm2 \n" \
@ -1423,19 +1423,19 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
"movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
"lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV444
YUVTORGB(YuvConstants)
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
@ -1444,25 +1444,25 @@ void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width) {
void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV444
YUVTORGB(YuvConstants)
YUVTORGB(yuvconstants)
STOREABGR
"sub $0x8,%[width] \n"
"jg 1b \n"
@ -1471,7 +1471,7 @@ void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
[width]"+rm"(width) // %[width]
: [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@ -1482,6 +1482,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgb24,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
@ -1490,7 +1491,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
LABELALIGN
"1: \n"
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(yuvconstants)
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm2,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
@ -1514,7 +1515,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
#else
[width]"+rm"(width) // %[width]
#endif
: [kYuvConstants]"r"(&kYuvConstants.kUVToB),
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
[kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
: "memory", "cc", NACL_R14
@ -1526,6 +1527,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_raw,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
@ -1534,7 +1536,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
LABELALIGN
"1: \n"
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(yuvconstants)
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm2,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
@ -1558,7 +1560,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
#else
[width]"+rm"(width) // %[width]
#endif
: [kYuvConstants]"r"(&kYuvConstants.kUVToB),
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
[kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
: "memory", "cc", NACL_R14
@ -1566,19 +1568,19 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
);
}
void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV422
YUVTORGB(YuvConstants)
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
@ -1587,7 +1589,7 @@ void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@ -1597,6 +1599,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1604,7 +1607,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
LABELALIGN
"1: \n"
READYUV411
YUVTORGB(kYuvConstants)
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
@ -1613,7 +1616,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@ -1622,13 +1625,14 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READNV12
YUVTORGB(kYuvConstants)
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
@ -1636,30 +1640,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
// Does not use r14.
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READNV12
YUVTORGB(kYuvConstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYvuConstants.kUVToB) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
// Does not use r14.
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@ -1669,6 +1650,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_bgra,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1676,7 +1658,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
LABELALIGN
"1: \n"
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(yuvconstants)
STOREBGRA
"sub $0x8,%[width] \n"
"jg 1b \n"
@ -1685,25 +1667,25 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width) {
void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(yuvconstants)
STOREABGR
"sub $0x8,%[width] \n"
"jg 1b \n"
@ -1712,7 +1694,7 @@ void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@ -1722,6 +1704,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgba,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1729,7 +1712,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
LABELALIGN
"1: \n"
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(yuvconstants)
STORERGBA
"sub $0x8,%[width] \n"
"jg 1b \n"
@ -1738,7 +1721,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@ -1788,6 +1771,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_bgra,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1795,7 +1779,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
LABELALIGN
"1: \n"
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(yuvconstants)
// Step 3: Weave into BGRA
"vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB
@ -1816,29 +1800,29 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_I422TOBGRAROW_AVX2
#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
#if defined(HAS_I422TOARGBROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(yuvconstants)
// Step 3: Weave into ARGB
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
@ -1859,29 +1843,29 @@ void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_I422TOARGBMATRIXROW_AVX2
#endif // HAS_I422TOARGBROW_AVX2
#if defined(HAS_I422TOABGRROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(yuvconstants)
// Step 3: Weave into ABGR
"vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" // RG
@ -1901,7 +1885,7 @@ void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@ -1915,6 +1899,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@ -1922,7 +1907,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
LABELALIGN
"1: \n"
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(yuvconstants)
// Step 3: Weave into RGBA
"vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
@ -1942,7 +1927,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);

View File

@ -593,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
#define I422ToTransientMipsRGB \
#define YUVTORGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
@ -652,10 +652,12 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
// TODO(fbarchard): accept yuv conversion constants.
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
@ -671,7 +673,7 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
"1: \n"
I422ToTransientMipsRGB
YUVTORGB
// Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
@ -717,6 +719,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
@ -732,7 +735,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
"1: \n"
I422ToTransientMipsRGB
YUVTORGB
// Arranging into abgr format
"precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
"precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
@ -778,6 +781,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
@ -793,7 +797,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
"ori $s6, 0xff \n" // |00|ff|00|ff|
"1: \n"
I422ToTransientMipsRGB
YUVTORGB
// Arranging into bgra format
"precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
"precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|

View File

@ -93,7 +93,7 @@ extern "C" {
"vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n"
#define YUV422TORGB_SETUP_REG \
#define YUVTORGB_SETUP \
MEMACCESS([kUVToRB]) \
"vld1.8 {d24}, [%[kUVToRB]] \n" \
MEMACCESS([kUVToG]) \
@ -107,7 +107,7 @@ extern "C" {
MEMACCESS([kYToRgb]) \
"vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n"
#define YUV422TORGB \
#define YUVTORGB \
"vmull.u8 q8, d2, d24 \n" /* u/v B/R component */\
"vmull.u8 q9, d2, d25 \n" /* u/v G component */\
"vmovl.u8 q0, d0 \n" /* Y */\
@ -138,12 +138,13 @@ void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV444
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3)
@ -154,26 +155,26 @@ void I444ToARGBRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void I422ToARGBMatrixRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstantsNEON* YuvConstants,
int width) {
void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3)
@ -184,10 +185,10 @@ void I422ToARGBMatrixRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5
[kUVToG]"r"(&YuvConstants->kUVToG), // %6
[kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
[kYToRgb]"r"(&YuvConstants->kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -197,12 +198,13 @@ void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV411
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3)
@ -213,10 +215,10 @@ void I411ToARGBRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -226,12 +228,13 @@ void I422ToBGRARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vswp.u8 d20, d22 \n"
"vmov.u8 d19, #255 \n"
@ -243,26 +246,26 @@ void I422ToBGRARow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_bgra), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void I422ToABGRMatrixRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
struct YuvConstantsNEON* YuvConstants,
int width) {
void I422ToABGRRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vswp.u8 d20, d22 \n"
"vmov.u8 d23, #255 \n"
@ -274,10 +277,10 @@ void I422ToABGRMatrixRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_abgr), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5
[kUVToG]"r"(&YuvConstants->kUVToG), // %6
[kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
[kYToRgb]"r"(&YuvConstants->kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -287,12 +290,13 @@ void I422ToRGBARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d19, #255 \n"
MEMACCESS(3)
@ -303,10 +307,10 @@ void I422ToRGBARow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgba), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -316,12 +320,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
MEMACCESS(3)
"vst3.8 {d20, d21, d22}, [%3]! \n"
@ -331,10 +336,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgb24), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -344,12 +349,13 @@ void I422ToRAWRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_raw,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vswp.u8 d20, d22 \n"
MEMACCESS(3)
@ -360,10 +366,10 @@ void I422ToRAWRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_raw), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -385,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
ARGBTORGB565
MEMACCESS(3)
@ -401,10 +408,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgb565), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -429,12 +436,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
ARGBTOARGB1555
@ -446,10 +454,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb1555), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -468,13 +476,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
"1: \n"
READYUV422
YUV422TORGB
YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
ARGBTOARGB4444
@ -486,10 +495,10 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb4444), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -499,10 +508,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
uint8* dst_argb,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV400
YUV422TORGB
YUVTORGB
"subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1)
@ -511,10 +520,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&kYuvConstants.kUVToRB),
[kUVToG]"r"(&kYuvConstants.kUVToG),
[kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -545,12 +554,13 @@ void J400ToARGBRow_NEON(const uint8* src_y,
void NV12ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READNV12
YUV422TORGB
YUVTORGB
"subs %3, %3, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(2)
@ -560,37 +570,10 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void NV21ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
"1: \n"
READNV21
YUV422TORGB
"subs %3, %3, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(2)
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -599,12 +582,13 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READNV12
YUV422TORGB
YUVTORGB
"subs %3, %3, #8 \n"
ARGBTORGB565
MEMACCESS(2)
@ -614,37 +598,10 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void NV21ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
"1: \n"
READNV21
YUV422TORGB
"subs %3, %3, #8 \n"
ARGBTORGB565
MEMACCESS(2)
"vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -652,12 +609,13 @@ void NV21ToRGB565Row_NEON(const uint8* src_y,
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUY2
YUV422TORGB
YUVTORGB
"subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1)
@ -666,10 +624,10 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
: "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@ -677,12 +635,13 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READUYVY
YUV422TORGB
YUVTORGB
"subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1)
@ -691,10 +650,10 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
: "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);

View File

@ -91,7 +91,8 @@ extern "C" {
"uzp2 v3.8b, v2.8b, v2.8b \n" \
"ins v1.s[1], v3.s[0] \n"
#define YUV422TORGB_SETUP_REG \
// TODO(fbarchard): replace movi with constants from struct.
#define YUVTORGB_SETUP \
"ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
"ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
"ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
@ -101,7 +102,7 @@ extern "C" {
"movi v29.8h, #25 \n" \
"movi v30.8h, #52 \n"
#define YUV422TORGB(vR, vG, vB) \
#define YUVTORGB(vR, vG, vB) \
"uxtl v0.8h, v0.8b \n" /* Extract Y */ \
"shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
"ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
@ -143,12 +144,13 @@ void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV444
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
@ -159,8 +161,8 @@ void I444ToARGBRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -173,12 +175,13 @@ void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
@ -189,8 +192,8 @@ void I422ToARGBRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -202,12 +205,13 @@ void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV411
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
@ -218,8 +222,8 @@ void I411ToARGBRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -231,12 +235,13 @@ void I422ToBGRARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB(v21, v22, v23)
YUVTORGB(v21, v22, v23)
"subs %w4, %w4, #8 \n"
"movi v20.8b, #255 \n" /* A */
MEMACCESS(3)
@ -247,8 +252,8 @@ void I422ToBGRARow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_bgra), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -261,12 +266,13 @@ void I422ToABGRRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB(v20, v21, v22)
YUVTORGB(v20, v21, v22)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
@ -277,8 +283,8 @@ void I422ToABGRRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_abgr), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -290,12 +296,13 @@ void I422ToRGBARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB(v23, v22, v21)
YUVTORGB(v23, v22, v21)
"subs %w4, %w4, #8 \n"
"movi v20.8b, #255 \n" /* A */
MEMACCESS(3)
@ -306,8 +313,8 @@ void I422ToRGBARow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgba), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -319,12 +326,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
MEMACCESS(3)
"st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
@ -334,8 +342,8 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgb24), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -347,12 +355,13 @@ void I422ToRAWRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_raw,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB(v20, v21, v22)
YUVTORGB(v20, v21, v22)
"subs %w4, %w4, #8 \n"
MEMACCESS(3)
"st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
@ -362,8 +371,8 @@ void I422ToRAWRow_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_raw), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -382,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
ARGBTORGB565
MEMACCESS(3)
@ -398,8 +408,8 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_rgb565), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -420,12 +430,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV422
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n"
ARGBTOARGB1555
@ -437,8 +448,8 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb1555), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -460,13 +471,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"movi v4.16b, #0x0f \n" // bits to clear with vbic.
"1: \n"
READYUV422
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n"
ARGBTOARGB4444
@ -478,8 +490,8 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
"+r"(src_v), // %2
"+r"(dst_argb4444), // %3
"+r"(width) // %4
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -492,10 +504,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
int width) {
int64 width64 = (int64)(width);
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUV400
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1)
@ -504,8 +516,8 @@ void I400ToARGBRow_NEON(const uint8* src_y,
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width64) // %2
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -540,12 +552,13 @@ void J400ToARGBRow_NEON(const uint8* src_y,
void NV12ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READNV12
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(2)
@ -555,51 +568,25 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_NV12TOARGBROW_NEON
#ifdef HAS_NV21TOARGBROW_NEON
void NV21ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
"1: \n"
READNV21
YUV422TORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(2)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_NV21TOARGBROW_NEON
#ifdef HAS_NV12TORGB565ROW_NEON
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READNV12
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n"
ARGBTORGB565
MEMACCESS(2)
@ -609,51 +596,25 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_NV12TORGB565ROW_NEON
#ifdef HAS_NV21TORGB565ROW_NEON
void NV21ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
asm volatile (
YUV422TORGB_SETUP_REG
"1: \n"
READNV21
YUV422TORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n"
ARGBTORGB565
MEMACCESS(2)
"st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_NV21TORGB565ROW_NEON
#ifdef HAS_YUY2TOARGBROW_NEON
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
int64 width64 = (int64)(width);
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READYUY2
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1)
@ -662,8 +623,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
: "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1
"+r"(width64) // %2
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@ -673,13 +634,14 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
#ifdef HAS_UYVYTOARGBROW_NEON
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
int64 width64 = (int64)(width);
asm volatile (
YUV422TORGB_SETUP_REG
YUVTORGB_SETUP
"1: \n"
READUYVY
YUV422TORGB(v22, v21, v20)
YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1)
@ -688,8 +650,8 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
: "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1
"+r"(width64) // %2
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);

View File

@ -83,13 +83,13 @@ extern "C" {
dst_argb += 32;
#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
#if defined(HAS_I422TOARGBROW_SSSE3)
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
@ -102,13 +102,13 @@ void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
}
#endif
#if defined(HAS_I422TOABGRMATRIXROW_SSSE3)
void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
#if defined(HAS_I422TOABGRROW_SSSE3)
void I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
@ -1963,16 +1963,16 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
__asm lea edx, [edx + 64] \
}
#ifdef HAS_I422TOARGBMATRIXROW_AVX2
#ifdef HAS_I422TOARGBROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked)
void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void I422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
@ -2001,18 +2001,18 @@ void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
ret
}
}
#endif // HAS_I422TOARGBMATRIXROW_AVX2
#endif // HAS_I422TOARGBROW_AVX2
#ifdef HAS_I444TOARGBMATRIXROW_AVX2
#ifdef HAS_I444TOARGBROW_AVX2
// 16 pixels
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked)
void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void I444ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
@ -2040,18 +2040,18 @@ void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
ret
}
}
#endif // HAS_I444TOARGBMATRIXROW_AVX2
#endif // HAS_I444TOARGBROW_AVX2
#ifdef HAS_I444TOABGRMATRIXROW_AVX2
#ifdef HAS_I444TOABGRROW_AVX2
// 16 pixels
// 16 UV values with 16 Y producing 16 ABGR (64 bytes).
__declspec(naked)
void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width) {
void I444ToABGRRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
@ -2079,7 +2079,7 @@ void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
ret
}
}
#endif // HAS_I444TOABGRMATRIXROW_AVX2
#endif // HAS_I444TOABGRROW_AVX2
#ifdef HAS_I411TOARGBROW_AVX2
// 16 pixels
@ -2089,26 +2089,30 @@ void I411ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV411_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(ebp)
STOREARGB_AVX2
sub ecx, 16
jg convertloop
pop ebp
pop edi
pop esi
vzeroupper
@ -2124,23 +2128,27 @@ __declspec(naked)
void NV12ToARGBRow_AVX2(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // UV
mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width
push ebp
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // UV
mov edx, [esp + 8 + 12] // argb
mov ebp, [esp + 8 + 16] // YuvConstants
mov ecx, [esp + 8 + 20] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READNV12_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(ebp)
STOREARGB_AVX2
sub ecx, 16
jg convertloop
pop ebp
pop esi
vzeroupper
ret
@ -2148,37 +2156,6 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,
}
#endif // HAS_NV12TOARGBROW_AVX2
#ifdef HAS_NV21TOARGBROW_AVX2
// 16 pixels.
// 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked)
void NV21ToARGBRow_AVX2(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // UV
mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READNV12_AVX2
YUVTORGB_AVX2(kYvuConstants)
STOREARGB_AVX2
sub ecx, 16
jg convertloop
pop esi
vzeroupper
ret
}
}
#endif // HAS_NV21TOARGBROW_AVX2
#ifdef HAS_I422TOBGRAROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
@ -2188,26 +2165,30 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(ebp)
STOREBGRA_AVX2
sub ecx, 16
jg convertloop
pop ebp
pop edi
pop esi
vzeroupper
@ -2224,26 +2205,30 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
YUVTORGB_AVX2(ebp)
STORERGBA_AVX2
sub ecx, 16
jg convertloop
pop ebp
pop edi
pop esi
vzeroupper
@ -2256,12 +2241,12 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
__declspec(naked)
void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void I422ToABGRRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
@ -2481,12 +2466,12 @@ void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked)
void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
@ -2518,12 +2503,12 @@ void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
__declspec(naked)
void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width) {
void I444ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
@ -2559,27 +2544,31 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgb24,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb24
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
convertloop:
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(ebp)
STORERGB24
sub ecx, 8
jg convertloop
pop ebp
pop edi
pop esi
ret
@ -2593,27 +2582,31 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_raw,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // raw
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
convertloop:
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(ebp)
STORERAW
sub ecx, 8
jg convertloop
pop ebp
pop edi
pop esi
ret
@ -2627,15 +2620,18 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb565_buf,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb565
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
psrld xmm5, 27
@ -2647,12 +2643,13 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
convertloop:
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(ebp)
STORERGB565
sub ecx, 8
jg convertloop
pop ebp
pop edi
pop esi
ret
@ -2662,12 +2659,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked)
void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
@ -2704,30 +2701,32 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push ebx
push esi
push edi
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ecx, [esp + 12 + 20] // width
mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READYUV411 // modifies EBX
YUVTORGB(kYuvConstants)
READYUV411
YUVTORGB(ebp)
STOREARGB
sub ecx, 8
jg convertloop
pop ebp
pop edi
pop esi
pop ebx
ret
}
}
@ -2738,51 +2737,27 @@ __declspec(naked)
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // UV
mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width
push ebp
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // UV
mov edx, [esp + 8 + 12] // argb
mov ebp, [esp + 8 + 16] // YuvConstants
mov ecx, [esp + 8 + 20] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READNV12
YUVTORGB(kYuvConstants)
STOREARGB
sub ecx, 8
jg convertloop
pop esi
ret
}
}
// 8 pixels.
// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked)
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // UV
mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READNV12
YUVTORGB(kYvuConstants)
YUVTORGB(ebp)
STOREARGB
sub ecx, 8
jg convertloop
pop ebp
pop esi
ret
}
@ -2793,25 +2768,29 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_bgra,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // bgra
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
convertloop:
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(ebp)
STOREBGRA
sub ecx, 8
jg convertloop
pop ebp
pop edi
pop esi
ret
@ -2819,12 +2798,12 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
}
__declspec(naked)
void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* YuvConstants,
int width) {
void I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
@ -2858,31 +2837,34 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgba,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgba
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
convertloop:
READYUV422
YUVTORGB(kYuvConstants)
YUVTORGB(ebp)
STORERGBA
sub ecx, 8
jg convertloop
pop ebp
pop edi
pop esi
ret
}
}
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I400TOARGBROW_SSE2