mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
yuvconstants for all YUV to RGB conversion functions.
R=harryjin@google.com BUG=libyuv:488 Review URL: https://codereview.chromium.org/1363503002 .
This commit is contained in:
parent
62c49dc811
commit
f96890a0be
File diff suppressed because it is too large
Load Diff
@ -56,6 +56,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I444ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
@ -103,7 +104,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I444ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -124,6 +125,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I444ToABGRRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_abgr ||
|
||||
@ -171,7 +173,7 @@ int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I444ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
I444ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -192,6 +194,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
@ -248,7 +251,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -269,6 +272,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I411ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
@ -316,7 +320,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -338,6 +342,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
|
||||
ARGBCopyYToAlphaRow_C;
|
||||
@ -436,7 +441,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
|
||||
ARGBCopyYToAlphaRow(src_a, dst_argb, width);
|
||||
ARGBAttenuateRow(dst_argb, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
@ -462,6 +467,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToABGRRow_C;
|
||||
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
|
||||
ARGBCopyYToAlphaRow_C;
|
||||
@ -560,7 +566,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
|
||||
ARGBCopyYToAlphaRow(src_a, dst_abgr, width);
|
||||
ARGBAttenuateRow(dst_abgr, dst_abgr, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
@ -639,7 +645,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
|
||||
void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
|
||||
J400ToARGBRow_C;
|
||||
if (!src_y || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -766,7 +772,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
RGB24ToARGBRow_C;
|
||||
if (!src_rgb24 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -816,7 +822,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
|
||||
RAWToARGBRow_C;
|
||||
if (!src_raw || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -866,7 +872,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
|
||||
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
|
||||
RGB565ToARGBRow_C;
|
||||
if (!src_rgb565 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -925,7 +931,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
|
||||
int pix) = ARGB1555ToARGBRow_C;
|
||||
int width) = ARGB1555ToARGBRow_C;
|
||||
if (!src_argb1555 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -983,7 +989,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
|
||||
int pix) = ARGB4444ToARGBRow_C;
|
||||
int width) = ARGB4444ToARGBRow_C;
|
||||
if (!src_argb4444 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1044,6 +1050,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
|
||||
void (*NV12ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1081,7 +1088,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, width);
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1098,10 +1105,11 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*NV21ToARGBRow)(const uint8* y_buf,
|
||||
void (*NV12ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = NV21ToARGBRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1112,33 +1120,33 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_NV21TOARGBROW_SSSE3)
|
||||
#if defined(HAS_NV12TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
|
||||
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV21TOARGBROW_AVX2)
|
||||
#if defined(HAS_NV12TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_AVX2;
|
||||
NV12ToARGBRow = NV12ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV21TOARGBROW_NEON)
|
||||
#if defined(HAS_NV12TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_NEON;
|
||||
NV12ToARGBRow = NV12ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV21ToARGBRow(src_y, src_uv, dst_argb, width);
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYvuConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1157,6 +1165,7 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
|
||||
void (*NV12ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToARGBRow_C;
|
||||
if (!src_m420 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1194,14 +1203,16 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
|
||||
&kYuvConstants, width);
|
||||
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
|
||||
dst_argb + dst_stride_argb, width);
|
||||
dst_argb + dst_stride_argb, &kYuvConstants, width);
|
||||
dst_argb += dst_stride_argb * 2;
|
||||
src_m420 += src_stride_m420 * 3;
|
||||
}
|
||||
if (height & 1) {
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
|
||||
&kYuvConstants, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1212,7 +1223,10 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
|
||||
void (*YUY2ToARGBRow)(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) =
|
||||
YUY2ToARGBRow_C;
|
||||
if (!src_yuy2 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1256,7 +1270,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
YUY2ToARGBRow(src_yuy2, dst_argb, width);
|
||||
YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvConstants, width);
|
||||
src_yuy2 += src_stride_yuy2;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
@ -1269,7 +1283,10 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
|
||||
void (*UYVYToARGBRow)(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) =
|
||||
UYVYToARGBRow_C;
|
||||
if (!src_uyvy || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1313,7 +1330,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
UYVYToARGBRow(src_uyvy, dst_argb, width);
|
||||
UYVYToARGBRow(src_uyvy, dst_argb, &kYuvConstants, width);
|
||||
src_uyvy += src_stride_uyvy;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
@ -1328,11 +1345,12 @@ int J420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*J422ToARGBRow)(const uint8* y_buf,
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = J422ToARGBRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1343,42 +1361,42 @@ int J420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_J422TOARGBROW_SSSE3)
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToARGBRow = J422ToARGBRow_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_AVX2)
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_AVX2;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
J422ToARGBRow = J422ToARGBRow_AVX2;
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_NEON)
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_NEON;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToARGBRow = J422ToARGBRow_NEON;
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1397,11 +1415,12 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*J422ToARGBRow)(const uint8* y_buf,
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = J422ToARGBRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1422,42 +1441,42 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_J422TOARGBROW_SSSE3)
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToARGBRow = J422ToARGBRow_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_AVX2)
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_AVX2;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
J422ToARGBRow = J422ToARGBRow_AVX2;
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_NEON)
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
J422ToARGBRow = J422ToARGBRow_Any_NEON;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToARGBRow = J422ToARGBRow_NEON;
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -1474,11 +1493,12 @@ int J420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*J422ToABGRRow)(const uint8* y_buf,
|
||||
void (*I422ToABGRRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = J422ToABGRRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToABGRRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_abgr ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1489,42 +1509,42 @@ int J420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
|
||||
dst_stride_abgr = -dst_stride_abgr;
|
||||
}
|
||||
#if defined(HAS_J422TOABGRROW_SSSE3)
|
||||
#if defined(HAS_I422TOABGRROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToABGRRow = J422ToABGRRow_SSSE3;
|
||||
I422ToABGRRow = I422ToABGRRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOABGRROW_AVX2)
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
J422ToABGRRow = J422ToABGRRow_Any_AVX2;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
J422ToABGRRow = J422ToABGRRow_AVX2;
|
||||
I422ToABGRRow = I422ToABGRRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOABGRROW_NEON)
|
||||
#if defined(HAS_I422TOABGRROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
J422ToABGRRow = J422ToABGRRow_Any_NEON;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToABGRRow = J422ToABGRRow_NEON;
|
||||
I422ToABGRRow = I422ToABGRRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
|
||||
J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
|
||||
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1543,11 +1563,12 @@ int J422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*J422ToABGRRow)(const uint8* y_buf,
|
||||
void (*I422ToABGRRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = J422ToABGRRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToABGRRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_abgr ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1568,42 +1589,42 @@ int J422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
|
||||
}
|
||||
#if defined(HAS_J422TOABGRROW_SSSE3)
|
||||
#if defined(HAS_I422TOABGRROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToABGRRow = J422ToABGRRow_SSSE3;
|
||||
I422ToABGRRow = I422ToABGRRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOABGRROW_AVX2)
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
J422ToABGRRow = J422ToABGRRow_Any_AVX2;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
J422ToABGRRow = J422ToABGRRow_AVX2;
|
||||
I422ToABGRRow = I422ToABGRRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOABGRROW_NEON)
|
||||
#if defined(HAS_I422TOABGRROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
J422ToABGRRow = J422ToABGRRow_Any_NEON;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
J422ToABGRRow = J422ToABGRRow_NEON;
|
||||
I422ToABGRRow = I422ToABGRRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
|
||||
J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
|
||||
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -1620,11 +1641,12 @@ int H420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*H422ToARGBRow)(const uint8* y_buf,
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = H422ToARGBRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1635,42 +1657,42 @@ int H420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_H422TOARGBROW_SSSE3)
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
H422ToARGBRow = H422ToARGBRow_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOARGBROW_AVX2)
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
H422ToARGBRow = H422ToARGBRow_Any_AVX2;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
H422ToARGBRow = H422ToARGBRow_AVX2;
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOARGBROW_NEON)
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
H422ToARGBRow = H422ToARGBRow_Any_NEON;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
H422ToARGBRow = H422ToARGBRow_NEON;
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1689,11 +1711,12 @@ int H422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*H422ToARGBRow)(const uint8* y_buf,
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = H422ToARGBRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1714,42 +1737,42 @@ int H422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_H422TOARGBROW_SSSE3)
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
H422ToARGBRow = H422ToARGBRow_SSSE3;
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOARGBROW_AVX2)
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
H422ToARGBRow = H422ToARGBRow_Any_AVX2;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
H422ToARGBRow = H422ToARGBRow_AVX2;
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOARGBROW_NEON)
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
H422ToARGBRow = H422ToARGBRow_Any_NEON;
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
H422ToARGBRow = H422ToARGBRow_NEON;
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -1766,11 +1789,12 @@ int H420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*H422ToABGRRow)(const uint8* y_buf,
|
||||
void (*I422ToABGRRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = H422ToABGRRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToABGRRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_abgr ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1781,42 +1805,42 @@ int H420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
|
||||
dst_stride_abgr = -dst_stride_abgr;
|
||||
}
|
||||
#if defined(HAS_H422TOABGRROW_SSSE3)
|
||||
#if defined(HAS_I422TOABGRROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
H422ToABGRRow = H422ToABGRRow_SSSE3;
|
||||
I422ToABGRRow = I422ToABGRRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOABGRROW_AVX2)
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
H422ToABGRRow = H422ToABGRRow_Any_AVX2;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
H422ToABGRRow = H422ToABGRRow_AVX2;
|
||||
I422ToABGRRow = I422ToABGRRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOABGRROW_NEON)
|
||||
#if defined(HAS_I422TOABGRROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
H422ToABGRRow = H422ToABGRRow_Any_NEON;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
H422ToABGRRow = H422ToABGRRow_NEON;
|
||||
I422ToABGRRow = I422ToABGRRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
|
||||
H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
|
||||
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1835,11 +1859,12 @@ int H422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*H422ToABGRRow)(const uint8* y_buf,
|
||||
void (*I422ToABGRRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = H422ToABGRRow_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToABGRRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_abgr ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1860,42 +1885,42 @@ int H422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
|
||||
}
|
||||
#if defined(HAS_H422TOABGRROW_SSSE3)
|
||||
#if defined(HAS_I422TOABGRROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
H422ToABGRRow = H422ToABGRRow_SSSE3;
|
||||
I422ToABGRRow = I422ToABGRRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOABGRROW_AVX2)
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
H422ToABGRRow = H422ToABGRRow_Any_AVX2;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
H422ToABGRRow = H422ToABGRRow_AVX2;
|
||||
I422ToABGRRow = I422ToABGRRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOABGRROW_NEON)
|
||||
#if defined(HAS_I422TOABGRROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
H422ToABGRRow = H422ToABGRRow_Any_NEON;
|
||||
I422ToABGRRow = I422ToABGRRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
H422ToABGRRow = H422ToABGRRow_NEON;
|
||||
I422ToABGRRow = I422ToABGRRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
|
||||
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
|
||||
H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
|
||||
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
|
||||
@ -462,6 +462,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -508,7 +509,7 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -531,6 +532,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToBGRARow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_bgra ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -577,7 +579,7 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
|
||||
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
|
||||
dst_bgra += dst_stride_bgra;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -600,6 +602,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToABGRRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_abgr ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -637,7 +640,7 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -660,6 +663,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGBARow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgba ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -697,7 +701,7 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
|
||||
dst_rgba += dst_stride_rgba;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -720,6 +724,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGB24Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -757,7 +762,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
|
||||
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, &kYuvConstants, width);
|
||||
dst_rgb24 += dst_stride_rgb24;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -780,6 +785,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRAWRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_raw ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -817,7 +823,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
|
||||
I422ToRAWRow(src_y, src_u, src_v, dst_raw, &kYuvConstants, width);
|
||||
dst_raw += dst_stride_raw;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -840,6 +846,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGB1555Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb1555 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -877,7 +884,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
|
||||
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvConstants, width);
|
||||
dst_argb1555 += dst_stride_argb1555;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -901,6 +908,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGB4444Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb4444 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -938,7 +946,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
|
||||
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvConstants, width);
|
||||
dst_argb4444 += dst_stride_argb4444;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -961,6 +969,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGB565Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -998,7 +1007,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
|
||||
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvConstants, width);
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1029,6 +1038,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
|
||||
const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
|
||||
@ -1105,7 +1115,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
// Allocate a row of argb.
|
||||
align_buffer_64(row_argb, width * 4);
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
|
||||
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvConstants, width);
|
||||
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
|
||||
@ -287,9 +287,9 @@ int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*YUY2ToUV422Row)(const uint8* src_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int pix) =
|
||||
uint8* dst_u, uint8* dst_v, int width) =
|
||||
YUY2ToUV422Row_C;
|
||||
void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
|
||||
void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
|
||||
YUY2ToYRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
@ -359,10 +359,10 @@ int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*UYVYToUV422Row)(const uint8* src_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int pix) =
|
||||
uint8* dst_u, uint8* dst_v, int width) =
|
||||
UYVYToUV422Row_C;
|
||||
void (*UYVYToYRow)(const uint8* src_uyvy,
|
||||
uint8* dst_y, int pix) = UYVYToYRow_C;
|
||||
uint8* dst_y, int width) = UYVYToYRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
@ -790,6 +790,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToBGRARow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_bgra ||
|
||||
@ -846,7 +847,7 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
|
||||
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
|
||||
dst_bgra += dst_stride_bgra;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -867,6 +868,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToABGRRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_abgr ||
|
||||
@ -914,7 +916,7 @@ int I422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
|
||||
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
|
||||
dst_abgr += dst_stride_abgr;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -935,6 +937,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGBARow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_rgba ||
|
||||
@ -982,7 +985,7 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
|
||||
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
|
||||
dst_rgba += dst_stride_rgba;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
@ -1001,6 +1004,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
void (*NV12ToRGB565Row)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToRGB565Row_C;
|
||||
if (!src_y || !src_uv || !dst_rgb565 ||
|
||||
width <= 0 || height == 0) {
|
||||
@ -1038,7 +1042,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
|
||||
NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvConstants, width);
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1055,10 +1059,11 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*NV21ToRGB565Row)(const uint8* y_buf,
|
||||
void (*NV12ToRGB565Row)(const uint8* y_buf,
|
||||
const uint8* src_vu,
|
||||
uint8* rgb_buf,
|
||||
int width) = NV21ToRGB565Row_C;
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) = NV12ToRGB565Row_C;
|
||||
if (!src_y || !src_vu || !dst_rgb565 ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1069,33 +1074,33 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
|
||||
dst_stride_rgb565 = -dst_stride_rgb565;
|
||||
}
|
||||
#if defined(HAS_NV21TORGB565ROW_SSSE3)
|
||||
#if defined(HAS_NV12TORGB565ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV21TORGB565ROW_AVX2)
|
||||
#if defined(HAS_NV12TORGB565ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2;
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV21ToRGB565Row = NV21ToRGB565Row_AVX2;
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV21TORGB565ROW_NEON)
|
||||
#if defined(HAS_NV12TORGB565ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV21ToRGB565Row = NV21ToRGB565Row_NEON;
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
|
||||
NV12ToRGB565Row(src_y, src_vu, dst_rgb565, &kYvuConstants, width);
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1110,7 +1115,7 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
|
||||
int width, int height,
|
||||
uint32 value) {
|
||||
int y;
|
||||
void (*SetRow)(uint8* dst, uint8 value, int pix) = SetRow_C;
|
||||
void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C;
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||
@ -1186,7 +1191,7 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height,
|
||||
uint32 value) {
|
||||
int y;
|
||||
void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int pix) = ARGBSetRow_C;
|
||||
void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C;
|
||||
if (!dst_argb ||
|
||||
width <= 0 || height == 0 ||
|
||||
dst_x < 0 || dst_y < 0) {
|
||||
@ -1909,7 +1914,7 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
|
||||
const uint8* shuffler, int width, int height) {
|
||||
int y;
|
||||
void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
|
||||
const uint8* shuffler, int pix) = ARGBShuffleRow_C;
|
||||
const uint8* shuffler, int width) = ARGBShuffleRow_C;
|
||||
if (!src_bgra || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
@ -1976,7 +1981,7 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
|
||||
const uint8* src_sobely,
|
||||
uint8* dst, int width)) {
|
||||
int y;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) =
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
|
||||
ARGBToYJRow_C;
|
||||
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
|
||||
uint8* dst_sobely, int width) = SobelYRow_C;
|
||||
@ -2360,8 +2365,8 @@ int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
int width, int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
|
||||
SplitUVRow_C;
|
||||
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) = SplitUVRow_C;
|
||||
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
@ -2464,8 +2469,8 @@ int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
int width, int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
|
||||
SplitUVRow_C;
|
||||
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) = SplitUVRow_C;
|
||||
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
|
||||
@ -40,103 +40,9 @@ extern "C" {
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||
ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(J422ToABGRRow_Any_SSSE3, J422ToABGRRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(H422ToARGBRow_Any_SSSE3, H422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(H422ToABGRRow_Any_SSSE3, H422ToABGRRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_SSSE3
|
||||
ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
|
||||
ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
|
||||
ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
|
||||
ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
|
||||
#ifdef HAS_I422TOYUY2ROW_SSE2
|
||||
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
|
||||
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
|
||||
#endif // HAS_I444TOARGBROW_SSSE3
|
||||
#ifdef HAS_I444TOABGRROW_SSSE3
|
||||
ANY31(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB24ROW_AVX2
|
||||
ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORAWROW_AVX2
|
||||
ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_J422TOARGBROW_AVX2
|
||||
ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_J422TOABGRROW_AVX2
|
||||
ANY31(J422ToABGRRow_Any_AVX2, J422ToABGRRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_H422TOARGBROW_AVX2
|
||||
ANY31(H422ToARGBRow_Any_AVX2, H422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_H422TOABGRROW_AVX2
|
||||
ANY31(H422ToABGRRow_Any_AVX2, H422ToABGRRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOBGRAROW_AVX2
|
||||
ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGBAROW_AVX2
|
||||
ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOABGRROW_AVX2
|
||||
ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_AVX2
|
||||
ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I444TOABGRROW_AVX2
|
||||
ANY31(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB4444ROW_AVX2
|
||||
ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB1555ROW_AVX2
|
||||
ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB565ROW_AVX2
|
||||
ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_NEON
|
||||
ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
|
||||
ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
|
||||
ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
|
||||
ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
|
||||
ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
|
||||
ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
|
||||
ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
|
||||
ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
|
||||
ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
|
||||
ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
|
||||
ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_J422TOARGBROW_NEON
|
||||
ANY31(J422ToARGBRow_Any_NEON, J422ToARGBRow_NEON, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_J422TOABGRROW_NEON
|
||||
ANY31(J422ToABGRRow_Any_NEON, J422ToABGRRow_NEON, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_H422TOARGBROW_NEON
|
||||
ANY31(H422ToARGBRow_Any_NEON, H422ToARGBRow_NEON, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_H422TOABGRROW_NEON
|
||||
ANY31(H422ToABGRRow_Any_NEON, H422ToABGRRow_NEON, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOYUY2ROW_NEON
|
||||
ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
|
||||
@ -144,7 +50,97 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
|
||||
#ifdef HAS_I422TOUYVYROW_NEON
|
||||
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
|
||||
#endif
|
||||
#undef ANY31
|
||||
#undef ANY31C
|
||||
|
||||
// Any 3 planes to 1 with yuvconstants
|
||||
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
uint8* dst_ptr, struct YuvConstants* yuvconstants, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 4]); \
|
||||
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \
|
||||
yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31C(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_SSSE3
|
||||
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
|
||||
ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
|
||||
ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
|
||||
ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
|
||||
ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
|
||||
#endif // HAS_I444TOARGBROW_SSSE3
|
||||
#ifdef HAS_I444TOABGRROW_SSSE3
|
||||
ANY31C(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB24ROW_AVX2
|
||||
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORAWROW_AVX2
|
||||
ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOBGRAROW_AVX2
|
||||
ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGBAROW_AVX2
|
||||
ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOABGRROW_AVX2
|
||||
ANY31C(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I444TOARGBROW_AVX2
|
||||
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I444TOABGRROW_AVX2
|
||||
ANY31C(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB4444ROW_AVX2
|
||||
ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGB1555ROW_AVX2
|
||||
ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TORGB565ROW_AVX2
|
||||
ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_I422TOARGBROW_NEON
|
||||
ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
|
||||
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
|
||||
ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
|
||||
ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
|
||||
ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
|
||||
ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
|
||||
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
|
||||
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
|
||||
#endif
|
||||
#undef ANY31C
|
||||
|
||||
// Any 2 planes to 1.
|
||||
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
@ -164,32 +160,6 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
// Biplanar to RGB.
|
||||
#ifdef HAS_NV12TOARGBROW_SSSE3
|
||||
ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_AVX2
|
||||
ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_NEON
|
||||
ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_SSSE3
|
||||
ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
|
||||
ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_AVX2
|
||||
ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
|
||||
ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_NEON
|
||||
ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
|
||||
ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
|
||||
// Merge functions.
|
||||
#ifdef HAS_MERGEUVROW_SSE2
|
||||
ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
|
||||
@ -249,6 +219,46 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
|
||||
#endif
|
||||
#undef ANY21
|
||||
|
||||
// Any 2 planes to 1 with yuvconstants
|
||||
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
|
||||
uint8* dst_ptr, struct YuvConstants* yuvconstants, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 3]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
|
||||
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
|
||||
SS(r, UVSHIFT) * SBPP2); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
// Biplanar to RGB.
|
||||
#ifdef HAS_NV12TOARGBROW_SSSE3
|
||||
ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_AVX2
|
||||
ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_NEON
|
||||
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_SSSE3
|
||||
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_AVX2
|
||||
ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_NEON
|
||||
ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
#undef ANY21C
|
||||
|
||||
// Any 1 to 1.
|
||||
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
@ -297,9 +307,7 @@ ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7)
|
||||
#if defined(HAS_I400TOARGBROW_AVX2)
|
||||
ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||
ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
#if defined(HAS_RGB24TOARGBROW_SSSE3)
|
||||
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
|
||||
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
|
||||
ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
|
||||
@ -315,10 +323,6 @@ ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15)
|
||||
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
|
||||
ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_YUY2TOARGBROW_AVX2)
|
||||
ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
|
||||
ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB24ROW_NEON)
|
||||
ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
|
||||
ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
|
||||
@ -327,8 +331,6 @@ ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7)
|
||||
ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
|
||||
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
|
||||
ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
|
||||
ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
|
||||
ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYROW_AVX2
|
||||
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
|
||||
@ -426,6 +428,35 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
|
||||
#endif
|
||||
#undef ANY11
|
||||
|
||||
// Any 1 to 1 with yuvconstants
|
||||
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
|
||||
struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||
ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
#endif
|
||||
#if defined(HAS_YUY2TOARGBROW_AVX2)
|
||||
ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
|
||||
ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
|
||||
#endif
|
||||
#if defined(HAS_YUY2TOARGBROW_NEON)
|
||||
ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
|
||||
ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
|
||||
#endif
|
||||
#undef ANY11C
|
||||
|
||||
// Any 1 to 1 blended.
|
||||
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
|
||||
1095
source/row_common.cc
1095
source/row_common.cc
File diff suppressed because it is too large
Load Diff
@ -1350,23 +1350,23 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
||||
"punpcklwd %%xmm0,%%xmm0 \n"
|
||||
|
||||
// Convert 8 pixels: 8 UV and 8 Y
|
||||
#define YUVTORGB(YuvConstants) \
|
||||
#define YUVTORGB(yuvconstants) \
|
||||
"movdqa %%xmm0,%%xmm1 \n" \
|
||||
"movdqa %%xmm0,%%xmm2 \n" \
|
||||
"movdqa %%xmm0,%%xmm3 \n" \
|
||||
"movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \
|
||||
"pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \
|
||||
"movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \
|
||||
"pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \
|
||||
"psubw %%xmm1,%%xmm0 \n" \
|
||||
"movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \
|
||||
"pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \
|
||||
"movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \
|
||||
"pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \
|
||||
"psubw %%xmm2,%%xmm1 \n" \
|
||||
"movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \
|
||||
"pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \
|
||||
"movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \
|
||||
"pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \
|
||||
"psubw %%xmm3,%%xmm2 \n" \
|
||||
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
||||
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
|
||||
"punpcklbw %%xmm3,%%xmm3 \n" \
|
||||
"pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \
|
||||
"pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \
|
||||
"paddsw %%xmm3,%%xmm0 \n" \
|
||||
"paddsw %%xmm3,%%xmm1 \n" \
|
||||
"paddsw %%xmm3,%%xmm2 \n" \
|
||||
@ -1423,19 +1423,19 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
||||
"movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
|
||||
"lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
|
||||
|
||||
void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV444
|
||||
YUVTORGB(YuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
@ -1444,25 +1444,25 @@ void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV444
|
||||
YUVTORGB(YuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREABGR
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
@ -1471,7 +1471,7 @@ void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
@ -1482,6 +1482,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_rgb24,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
|
||||
@ -1490,7 +1491,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpcklbw %%xmm2,%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
@ -1514,7 +1515,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||
#else
|
||||
[width]"+rm"(width) // %[width]
|
||||
#endif
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB),
|
||||
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
|
||||
[kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
|
||||
[kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
|
||||
: "memory", "cc", NACL_R14
|
||||
@ -1526,6 +1527,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_raw,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
|
||||
@ -1534,7 +1536,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpcklbw %%xmm2,%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
@ -1558,7 +1560,7 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
#else
|
||||
[width]"+rm"(width) // %[width]
|
||||
#endif
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB),
|
||||
: [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
|
||||
[kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
|
||||
[kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
|
||||
: "memory", "cc", NACL_R14
|
||||
@ -1566,19 +1568,19 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(YuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
@ -1587,7 +1589,7 @@ void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
@ -1597,6 +1599,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
@ -1604,7 +1607,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV411
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
@ -1613,7 +1616,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
@ -1622,13 +1625,14 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READNV12
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
@ -1636,30 +1640,7 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
[uv_buf]"+r"(uv_buf), // %[uv_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
|
||||
// Does not use r14.
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READNV12
|
||||
YUVTORGB(kYuvConstants)
|
||||
STOREARGB
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[uv_buf]"+r"(uv_buf), // %[uv_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYvuConstants.kUVToB) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
// Does not use r14.
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
@ -1669,6 +1650,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_bgra,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
@ -1676,7 +1658,7 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREBGRA
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
@ -1685,25 +1667,25 @@ void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
|
||||
void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
STOREABGR
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
@ -1712,7 +1694,7 @@ void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
@ -1722,6 +1704,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_rgba,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
@ -1729,7 +1712,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(yuvconstants)
|
||||
STORERGBA
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
@ -1738,7 +1721,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
@ -1788,6 +1771,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_bgra,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
@ -1795,7 +1779,7 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
|
||||
// Step 3: Weave into BGRA
|
||||
"vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB
|
||||
@ -1816,29 +1800,29 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TOBGRAROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
|
||||
// Step 3: Weave into ARGB
|
||||
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
|
||||
@ -1859,29 +1843,29 @@ void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
}
|
||||
#endif // HAS_I422TOARGBMATRIXROW_AVX2
|
||||
#endif // HAS_I422TOARGBROW_AVX2
|
||||
|
||||
#if defined(HAS_I422TOABGRROW_AVX2)
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
|
||||
void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
|
||||
// Step 3: Weave into ABGR
|
||||
"vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" // RG
|
||||
@ -1901,7 +1885,7 @@ void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
@ -1915,6 +1899,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
@ -1922,7 +1907,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(yuvconstants)
|
||||
|
||||
// Step 3: Weave into RGBA
|
||||
"vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
|
||||
@ -1942,7 +1927,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", NACL_R14
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
|
||||
@ -593,7 +593,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
// t8 = | 0 | G1 | 0 | g1 |
|
||||
// t2 = | 0 | R0 | 0 | r0 |
|
||||
// t1 = | 0 | R1 | 0 | r1 |
|
||||
#define I422ToTransientMipsRGB \
|
||||
#define YUVTORGB \
|
||||
"lw $t0, 0(%[y_buf]) \n" \
|
||||
"lhu $t1, 0(%[u_buf]) \n" \
|
||||
"lhu $t2, 0(%[v_buf]) \n" \
|
||||
@ -652,10 +652,12 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
"addu.ph $t2, $t2, $s5 \n" \
|
||||
"addu.ph $t1, $t1, $s5 \n"
|
||||
|
||||
// TODO(fbarchard): accept yuv conversion constants.
|
||||
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
@ -671,7 +673,7 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
|
||||
|
||||
"1: \n"
|
||||
I422ToTransientMipsRGB
|
||||
YUVTORGB
|
||||
// Arranging into argb format
|
||||
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
|
||||
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
|
||||
@ -717,6 +719,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
@ -732,7 +735,7 @@ void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
|
||||
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
|
||||
|
||||
"1: \n"
|
||||
I422ToTransientMipsRGB
|
||||
YUVTORGB
|
||||
// Arranging into abgr format
|
||||
"precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
|
||||
"precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
|
||||
@ -778,6 +781,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
@ -793,7 +797,7 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
|
||||
"ori $s6, 0xff \n" // |00|ff|00|ff|
|
||||
|
||||
"1: \n"
|
||||
I422ToTransientMipsRGB
|
||||
YUVTORGB
|
||||
// Arranging into bgra format
|
||||
"precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
|
||||
"precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
|
||||
|
||||
@ -93,7 +93,7 @@ extern "C" {
|
||||
"vuzp.u8 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
#define YUV422TORGB_SETUP_REG \
|
||||
#define YUVTORGB_SETUP \
|
||||
MEMACCESS([kUVToRB]) \
|
||||
"vld1.8 {d24}, [%[kUVToRB]] \n" \
|
||||
MEMACCESS([kUVToG]) \
|
||||
@ -107,7 +107,7 @@ extern "C" {
|
||||
MEMACCESS([kYToRgb]) \
|
||||
"vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n"
|
||||
|
||||
#define YUV422TORGB \
|
||||
#define YUVTORGB \
|
||||
"vmull.u8 q8, d2, d24 \n" /* u/v B/R component */\
|
||||
"vmull.u8 q9, d2, d25 \n" /* u/v G component */\
|
||||
"vmovl.u8 q0, d0 \n" /* Y */\
|
||||
@ -138,12 +138,13 @@ void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV444
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(3)
|
||||
@ -154,26 +155,26 @@ void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToARGBMatrixRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstantsNEON* YuvConstants,
|
||||
int width) {
|
||||
void I422ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(3)
|
||||
@ -184,10 +185,10 @@ void I422ToARGBMatrixRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5
|
||||
[kUVToG]"r"(&YuvConstants->kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&YuvConstants->kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -197,12 +198,13 @@ void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV411
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(3)
|
||||
@ -213,10 +215,10 @@ void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -226,12 +228,13 @@ void I422ToBGRARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_bgra,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vswp.u8 d20, d22 \n"
|
||||
"vmov.u8 d19, #255 \n"
|
||||
@ -243,26 +246,26 @@ void I422ToBGRARow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_bgra), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void I422ToABGRMatrixRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstantsNEON* YuvConstants,
|
||||
int width) {
|
||||
void I422ToABGRRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vswp.u8 d20, d22 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
@ -274,10 +277,10 @@ void I422ToABGRMatrixRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_abgr), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5
|
||||
[kUVToG]"r"(&YuvConstants->kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&YuvConstants->kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -287,12 +290,13 @@ void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgba,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vmov.u8 d19, #255 \n"
|
||||
MEMACCESS(3)
|
||||
@ -303,10 +307,10 @@ void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_rgba), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -316,12 +320,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb24,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
MEMACCESS(3)
|
||||
"vst3.8 {d20, d21, d22}, [%3]! \n"
|
||||
@ -331,10 +336,10 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_rgb24), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -344,12 +349,13 @@ void I422ToRAWRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vswp.u8 d20, d22 \n"
|
||||
MEMACCESS(3)
|
||||
@ -360,10 +366,10 @@ void I422ToRAWRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_raw), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -385,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb565,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
ARGBTORGB565
|
||||
MEMACCESS(3)
|
||||
@ -401,10 +408,10 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_rgb565), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -429,12 +436,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb1555,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
ARGBTOARGB1555
|
||||
@ -446,10 +454,10 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb1555), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -468,13 +476,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb4444,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %4, %4, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
ARGBTOARGB4444
|
||||
@ -486,10 +495,10 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb4444), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -499,10 +508,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV400
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %2, %2, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(1)
|
||||
@ -511,10 +520,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&kYuvConstants.kUVToRB),
|
||||
[kUVToG]"r"(&kYuvConstants.kUVToG),
|
||||
[kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -545,12 +554,13 @@ void J400ToARGBRow_NEON(const uint8* src_y,
|
||||
void NV12ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READNV12
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %3, %3, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(2)
|
||||
@ -560,37 +570,10 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void NV21ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUV422TORGB
|
||||
"subs %3, %3, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d20, d21, d22, d23}, [%2]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -599,12 +582,13 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
|
||||
void NV12ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_rgb565,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READNV12
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %3, %3, #8 \n"
|
||||
ARGBTORGB565
|
||||
MEMACCESS(2)
|
||||
@ -614,37 +598,10 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_rgb565), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void NV21ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_rgb565,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUV422TORGB
|
||||
"subs %3, %3, #8 \n"
|
||||
ARGBTORGB565
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_rgb565), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -652,12 +609,13 @@ void NV21ToRGB565Row_NEON(const uint8* src_y,
|
||||
|
||||
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUY2
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %2, %2, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(1)
|
||||
@ -666,10 +624,10 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
|
||||
: "+r"(src_yuy2), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
@ -677,12 +635,13 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
|
||||
|
||||
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READUYVY
|
||||
YUV422TORGB
|
||||
YUVTORGB
|
||||
"subs %2, %2, #8 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
MEMACCESS(1)
|
||||
@ -691,10 +650,10 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
||||
: "+r"(src_uyvy), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
|
||||
[kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
|
||||
[kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
|
||||
[kUVToG]"r"(&yuvconstants->kUVToG),
|
||||
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
|
||||
@ -91,7 +91,8 @@ extern "C" {
|
||||
"uzp2 v3.8b, v2.8b, v2.8b \n" \
|
||||
"ins v1.s[1], v3.s[0] \n"
|
||||
|
||||
#define YUV422TORGB_SETUP_REG \
|
||||
// TODO(fbarchard): replace movi with constants from struct.
|
||||
#define YUVTORGB_SETUP \
|
||||
"ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
|
||||
"ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
|
||||
"ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
|
||||
@ -101,7 +102,7 @@ extern "C" {
|
||||
"movi v29.8h, #25 \n" \
|
||||
"movi v30.8h, #52 \n"
|
||||
|
||||
#define YUV422TORGB(vR, vG, vB) \
|
||||
#define YUVTORGB(vR, vG, vB) \
|
||||
"uxtl v0.8h, v0.8b \n" /* Extract Y */ \
|
||||
"shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
|
||||
"ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
|
||||
@ -143,12 +144,13 @@ void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV444
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v23.8b, #255 \n" /* A */
|
||||
MEMACCESS(3)
|
||||
@ -159,8 +161,8 @@ void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
|
||||
[kYToRgb]"r"(&yuvconstants->kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -173,12 +175,13 @@ void I422ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v23.8b, #255 \n" /* A */
|
||||
MEMACCESS(3)
|
||||
@ -189,8 +192,8 @@ void I422ToARGBRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -202,12 +205,13 @@ void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV411
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v23.8b, #255 \n" /* A */
|
||||
MEMACCESS(3)
|
||||
@ -218,8 +222,8 @@ void I411ToARGBRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -231,12 +235,13 @@ void I422ToBGRARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_bgra,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v21, v22, v23)
|
||||
YUVTORGB(v21, v22, v23)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v20.8b, #255 \n" /* A */
|
||||
MEMACCESS(3)
|
||||
@ -247,8 +252,8 @@ void I422ToBGRARow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_bgra), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -261,12 +266,13 @@ void I422ToABGRRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v20, v21, v22)
|
||||
YUVTORGB(v20, v21, v22)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v23.8b, #255 \n" /* A */
|
||||
MEMACCESS(3)
|
||||
@ -277,8 +283,8 @@ void I422ToABGRRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_abgr), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -290,12 +296,13 @@ void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgba,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v23, v22, v21)
|
||||
YUVTORGB(v23, v22, v21)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v20.8b, #255 \n" /* A */
|
||||
MEMACCESS(3)
|
||||
@ -306,8 +313,8 @@ void I422ToRGBARow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_rgba), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -319,12 +326,13 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb24,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
MEMACCESS(3)
|
||||
"st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
|
||||
@ -334,8 +342,8 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_rgb24), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -347,12 +355,13 @@ void I422ToRAWRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_raw,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v20, v21, v22)
|
||||
YUVTORGB(v20, v21, v22)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
MEMACCESS(3)
|
||||
"st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
|
||||
@ -362,8 +371,8 @@ void I422ToRAWRow_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_raw), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -382,12 +391,13 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb565,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
ARGBTORGB565
|
||||
MEMACCESS(3)
|
||||
@ -398,8 +408,8 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_rgb565), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -420,12 +430,13 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb1555,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
ARGBTOARGB1555
|
||||
@ -437,8 +448,8 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb1555), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -460,13 +471,14 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb4444,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"movi v4.16b, #0x0f \n" // bits to clear with vbic.
|
||||
"1: \n"
|
||||
READYUV422
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w4, %w4, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
ARGBTOARGB4444
|
||||
@ -478,8 +490,8 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
"+r"(src_v), // %2
|
||||
"+r"(dst_argb4444), // %3
|
||||
"+r"(width) // %4
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -492,10 +504,10 @@ void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
int width) {
|
||||
int64 width64 = (int64)(width);
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUV400
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w2, %w2, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
MEMACCESS(1)
|
||||
@ -504,8 +516,8 @@ void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width64) // %2
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -540,12 +552,13 @@ void J400ToARGBRow_NEON(const uint8* src_y,
|
||||
void NV12ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READNV12
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
MEMACCESS(2)
|
||||
@ -555,51 +568,25 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
}
|
||||
#endif // HAS_NV12TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_NV21TOARGBROW_NEON
|
||||
void NV21ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
MEMACCESS(2)
|
||||
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_argb), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
}
|
||||
#endif // HAS_NV21TOARGBROW_NEON
|
||||
|
||||
#ifdef HAS_NV12TORGB565ROW_NEON
|
||||
void NV12ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_rgb565,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READNV12
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w3, %w3, #8 \n"
|
||||
ARGBTORGB565
|
||||
MEMACCESS(2)
|
||||
@ -609,51 +596,25 @@ void NV12ToRGB565Row_NEON(const uint8* src_y,
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_rgb565), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
}
|
||||
#endif // HAS_NV12TORGB565ROW_NEON
|
||||
|
||||
#ifdef HAS_NV21TORGB565ROW_NEON
|
||||
void NV21ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_uv,
|
||||
uint8* dst_rgb565,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
"1: \n"
|
||||
READNV21
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
"subs %w3, %w3, #8 \n"
|
||||
ARGBTORGB565
|
||||
MEMACCESS(2)
|
||||
"st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(src_uv), // %1
|
||||
"+r"(dst_rgb565), // %2
|
||||
"+r"(width) // %3
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
}
|
||||
#endif // HAS_NV21TORGB565ROW_NEON
|
||||
|
||||
#ifdef HAS_YUY2TOARGBROW_NEON
|
||||
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int64 width64 = (int64)(width);
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READYUY2
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w2, %w2, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
MEMACCESS(1)
|
||||
@ -662,8 +623,8 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
|
||||
: "+r"(src_yuy2), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width64) // %2
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
@ -673,13 +634,14 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
|
||||
#ifdef HAS_UYVYTOARGBROW_NEON
|
||||
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int64 width64 = (int64)(width);
|
||||
asm volatile (
|
||||
YUV422TORGB_SETUP_REG
|
||||
YUVTORGB_SETUP
|
||||
"1: \n"
|
||||
READUYVY
|
||||
YUV422TORGB(v22, v21, v20)
|
||||
YUVTORGB(v22, v21, v20)
|
||||
"subs %w2, %w2, #8 \n"
|
||||
"movi v23.8b, #255 \n"
|
||||
MEMACCESS(1)
|
||||
@ -688,8 +650,8 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
||||
: "+r"(src_uyvy), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width64) // %2
|
||||
: [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
|
||||
: [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
|
||||
[kYToRgb]"r"(&kYuvConstants.kYToRgb)
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
|
||||
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
|
||||
);
|
||||
|
||||
@ -83,13 +83,13 @@ extern "C" {
|
||||
dst_argb += 32;
|
||||
|
||||
|
||||
#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
|
||||
void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__m128i xmm0, xmm1, xmm2, xmm3;
|
||||
const __m128i xmm5 = _mm_set1_epi8(-1);
|
||||
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
||||
@ -102,13 +102,13 @@ void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422TOABGRMATRIXROW_SSSE3)
|
||||
void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
#if defined(HAS_I422TOABGRROW_SSSE3)
|
||||
void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__m128i xmm0, xmm1, xmm2, xmm3;
|
||||
const __m128i xmm5 = _mm_set1_epi8(-1);
|
||||
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
||||
@ -1963,16 +1963,16 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
__asm lea edx, [edx + 64] \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422TOARGBMATRIXROW_AVX2
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
__declspec(naked)
|
||||
void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2001,18 +2001,18 @@ void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_I422TOARGBMATRIXROW_AVX2
|
||||
#endif // HAS_I422TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_I444TOARGBMATRIXROW_AVX2
|
||||
#ifdef HAS_I444TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
|
||||
__declspec(naked)
|
||||
void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void I444ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2040,18 +2040,18 @@ void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_I444TOARGBMATRIXROW_AVX2
|
||||
#endif // HAS_I444TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_I444TOABGRMATRIXROW_AVX2
|
||||
#ifdef HAS_I444TOABGRROW_AVX2
|
||||
// 16 pixels
|
||||
// 16 UV values with 16 Y producing 16 ABGR (64 bytes).
|
||||
__declspec(naked)
|
||||
void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void I444ToABGRRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2079,7 +2079,7 @@ void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_I444TOABGRMATRIXROW_AVX2
|
||||
#endif // HAS_I444TOABGRROW_AVX2
|
||||
|
||||
#ifdef HAS_I411TOARGBROW_AVX2
|
||||
// 16 pixels
|
||||
@ -2089,26 +2089,30 @@ void I411ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // argb
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // abgr
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV411_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(ebp)
|
||||
STOREARGB_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
vzeroupper
|
||||
@ -2124,23 +2128,27 @@ __declspec(naked)
|
||||
void NV12ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
mov eax, [esp + 4 + 4] // Y
|
||||
mov esi, [esp + 4 + 8] // UV
|
||||
mov edx, [esp + 4 + 12] // argb
|
||||
mov ecx, [esp + 4 + 16] // width
|
||||
push ebp
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // UV
|
||||
mov edx, [esp + 8 + 12] // argb
|
||||
mov ebp, [esp + 8 + 16] // YuvConstants
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READNV12_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(ebp)
|
||||
STOREARGB_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop esi
|
||||
vzeroupper
|
||||
ret
|
||||
@ -2148,37 +2156,6 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,
|
||||
}
|
||||
#endif // HAS_NV12TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_NV21TOARGBROW_AVX2
|
||||
// 16 pixels.
|
||||
// 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
__declspec(naked)
|
||||
void NV21ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
mov eax, [esp + 4 + 4] // Y
|
||||
mov esi, [esp + 4 + 8] // UV
|
||||
mov edx, [esp + 4 + 12] // argb
|
||||
mov ecx, [esp + 4 + 16] // width
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READNV12_AVX2
|
||||
YUVTORGB_AVX2(kYvuConstants)
|
||||
STOREARGB_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop esi
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_NV21TOARGBROW_AVX2
|
||||
|
||||
#ifdef HAS_I422TOBGRAROW_AVX2
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
|
||||
@ -2188,26 +2165,30 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // argb
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // abgr
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(ebp)
|
||||
STOREBGRA_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
vzeroupper
|
||||
@ -2224,26 +2205,30 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // argb
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // abgr
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV422_AVX2
|
||||
YUVTORGB_AVX2(kYuvConstants)
|
||||
YUVTORGB_AVX2(ebp)
|
||||
STORERGBA_AVX2
|
||||
|
||||
sub ecx, 16
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
vzeroupper
|
||||
@ -2256,12 +2241,12 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
|
||||
// 16 pixels
|
||||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
|
||||
__declspec(naked)
|
||||
void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void I422ToABGRRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2481,12 +2466,12 @@ void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
|
||||
// 8 pixels.
|
||||
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2518,12 +2503,12 @@ void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
// 8 pixels.
|
||||
// 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
|
||||
__declspec(naked)
|
||||
void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void I444ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2559,27 +2544,31 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_rgb24,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // rgb24
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
|
||||
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(ebp)
|
||||
STORERGB24
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
@ -2593,27 +2582,31 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_raw,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // raw
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
|
||||
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(ebp)
|
||||
STORERAW
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
@ -2627,15 +2620,18 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb565_buf,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // rgb565
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
|
||||
psrld xmm5, 27
|
||||
@ -2647,12 +2643,13 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(ebp)
|
||||
STORERGB565
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
@ -2662,12 +2659,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
|
||||
// 8 pixels.
|
||||
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2704,30 +2701,32 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ecx, [esp + 12 + 20] // width
|
||||
mov edx, [esp + 12 + 16] // abgr
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READYUV411 // modifies EBX
|
||||
YUVTORGB(kYuvConstants)
|
||||
READYUV411
|
||||
YUVTORGB(ebp)
|
||||
STOREARGB
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
@ -2738,51 +2737,27 @@ __declspec(naked)
|
||||
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* dst_argb,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
mov eax, [esp + 4 + 4] // Y
|
||||
mov esi, [esp + 4 + 8] // UV
|
||||
mov edx, [esp + 4 + 12] // argb
|
||||
mov ecx, [esp + 4 + 16] // width
|
||||
push ebp
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // UV
|
||||
mov edx, [esp + 8 + 12] // argb
|
||||
mov ebp, [esp + 8 + 16] // YuvConstants
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READNV12
|
||||
YUVTORGB(kYuvConstants)
|
||||
STOREARGB
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// 8 pixels.
|
||||
// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
|
||||
__declspec(naked)
|
||||
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
mov eax, [esp + 4 + 4] // Y
|
||||
mov esi, [esp + 4 + 8] // UV
|
||||
mov edx, [esp + 4 + 12] // argb
|
||||
mov ecx, [esp + 4 + 16] // width
|
||||
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
||||
|
||||
convertloop:
|
||||
READNV12
|
||||
YUVTORGB(kYvuConstants)
|
||||
YUVTORGB(ebp)
|
||||
STOREARGB
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
@ -2793,25 +2768,29 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_bgra,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // bgra
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(ebp)
|
||||
STOREBGRA
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
@ -2819,12 +2798,12 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* YuvConstants,
|
||||
int width) {
|
||||
void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_abgr,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
@ -2858,31 +2837,34 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_rgba,
|
||||
struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // Y
|
||||
mov esi, [esp + 8 + 8] // U
|
||||
mov edi, [esp + 8 + 12] // V
|
||||
mov edx, [esp + 8 + 16] // rgba
|
||||
mov ecx, [esp + 8 + 20] // width
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // Y
|
||||
mov esi, [esp + 12 + 8] // U
|
||||
mov edi, [esp + 12 + 12] // V
|
||||
mov edx, [esp + 12 + 16] // argb
|
||||
mov ebp, [esp + 12 + 20] // YuvConstants
|
||||
mov ecx, [esp + 12 + 24] // width
|
||||
sub edi, esi
|
||||
|
||||
convertloop:
|
||||
READYUV422
|
||||
YUVTORGB(kYuvConstants)
|
||||
YUVTORGB(ebp)
|
||||
STORERGBA
|
||||
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HAS_I422TOARGBROW_SSSE3
|
||||
|
||||
#ifdef HAS_I400TOARGBROW_SSE2
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user