mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 09:16:48 +08:00
ARGBToRGB functions optimized
BUG=none TEST=media_unittest.exe --gunit_catch_exceptions=0 --yuvconverter_repeat=1000 --gunit_filter=LmiVideoFrameTest.ConvertTo*R* Review URL: https://webrtc-codereview.appspot.com/355002 git-svn-id: http://libyuv.googlecode.com/svn/trunk@138 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
782659b07c
commit
9eefb2e8dd
@ -192,6 +192,16 @@ int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To RGB24.
|
||||
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To RAW.
|
||||
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb, int dst_stride_rgb,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB to I400.
|
||||
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
|
||||
@ -1633,27 +1633,40 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
void (*FastConvertYUVToRGB24Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTORGB24ROW_NEON)
|
||||
void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
|
||||
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_NEON;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTORGB24ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 8) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_SSSE3;
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToRGB24Row = FastConvertYUVToRGB24Row_C;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
}
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORGB24ROW_SSSE3_DISABLED)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ARGBToRGB24Row = ARGBToRGB24Row_C;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
FastConvertYUVToRGB24Row(src_y, src_u, src_v, dst_argb, width);
|
||||
FastConvertYUVToARGBRow(src_y, src_u, src_v, row, width);
|
||||
ARGBToRGB24Row(row, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1666,37 +1679,50 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
|
||||
// Convert I420 to RAW.
|
||||
int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
void (*FastConvertYUVToRAWRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTORAWROW_NEON)
|
||||
void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
|
||||
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_NEON;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTORAWROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 8) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_SSSE3;
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToRAWRow = FastConvertYUVToRAWRow_C;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
}
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORAWROW_SSSE3_DISABLED)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ARGBToRAWRow = ARGBToRAWRow_C;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
FastConvertYUVToRAWRow(src_y, src_u, src_v, dst_argb, width);
|
||||
FastConvertYUVToARGBRow(src_y, src_u, src_v, row, width);
|
||||
ARGBToRAWRow(row, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1719,27 +1745,40 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
void (*FastConvertYUVToRGB565Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTORGB565ROW_NEON)
|
||||
void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
|
||||
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_NEON;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTORGB565ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 8) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_SSSE3;
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_C;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
}
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORGB565ROW_SSE2_DISABLED)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ARGBToRGB565Row = ARGBToRGB565Row_C;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
FastConvertYUVToRGB565Row(src_y, src_u, src_v, dst_argb, width);
|
||||
FastConvertYUVToARGBRow(src_y, src_u, src_v, row, width);
|
||||
ARGBToRGB565Row(row, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1762,27 +1801,40 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
void (*FastConvertYUVToARGB1555Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGB1555ROW_NEON)
|
||||
void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
|
||||
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_NEON;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGB1555ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 8) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_SSSE3;
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToARGB1555Row = FastConvertYUVToARGB1555Row_C;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
}
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTOARGB1555ROW_SSE2_DISABLED)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ARGBToARGB1555Row = ARGBToARGB1555Row_C;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
FastConvertYUVToARGB1555Row(src_y, src_u, src_v, dst_argb, width);
|
||||
FastConvertYUVToARGBRow(src_y, src_u, src_v, row, width);
|
||||
ARGBToARGB1555Row(row, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -1792,6 +1844,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to ARGB4444.
|
||||
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
@ -1804,27 +1857,40 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
void (*FastConvertYUVToARGB4444Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGB4444ROW_NEON)
|
||||
void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
|
||||
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_NEON;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGB4444ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 8) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_SSSE3;
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToARGB4444Row = FastConvertYUVToARGB4444Row_C;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
}
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ARGBToARGB4444Row = ARGBToARGB4444Row_C;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
FastConvertYUVToARGB4444Row(src_y, src_u, src_v, dst_argb, width);
|
||||
FastConvertYUVToARGBRow(src_y, src_u, src_v, row, width);
|
||||
ARGBToARGB4444Row(row, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -2119,6 +2185,65 @@ int BG24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB To RGB24.
|
||||
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height) {
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORGB24ROW_SSSE3_DISABLED)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
|
||||
IS_ALIGNED(dst_rgb24, 16) && IS_ALIGNED(dst_stride_rgb24, 16)) {
|
||||
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ARGBToRGB24Row = ARGBToRGB24Row_C;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
ARGBToRGB24Row(src_argb, dst_rgb24, width);
|
||||
src_argb += src_stride_argb;
|
||||
dst_rgb24 += dst_stride_rgb24;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB To RAW.
|
||||
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_raw, int dst_stride_raw,
|
||||
int width, int height) {
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORAWROW_SSSE3_DISABLED)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
|
||||
IS_ALIGNED(dst_raw, 16) && IS_ALIGNED(dst_stride_raw, 16)) {
|
||||
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ARGBToRAWRow = ARGBToRAWRow_C;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
ARGBToRAWRow(src_argb, dst_raw, width);
|
||||
src_argb += src_stride_argb;
|
||||
dst_raw += dst_stride_raw;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert NV12 to RGB565.
|
||||
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
@ -2131,25 +2256,37 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
|
||||
dst_stride_rgb = -dst_stride_rgb;
|
||||
}
|
||||
void (*FastConvertYUVToRGB565Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTORGB565ROW_NEON)
|
||||
void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
|
||||
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_NEON;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTORGB565ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
IS_ALIGNED(width, 8) &&
|
||||
IS_ALIGNED(dst_rgb, 16) && IS_ALIGNED(dst_stride_rgb, 16)) {
|
||||
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_SSSE3;
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_C;
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
|
||||
}
|
||||
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride]);
|
||||
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
#if defined(HAS_ARGBTORGB565ROW_SSE2_DISABLED)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(width, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
ARGBToRGB565Row = ARGBToRGB565Row_C;
|
||||
}
|
||||
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
|
||||
#if defined(HAS_SPLITUV_NEON)
|
||||
@ -2166,15 +2303,16 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
{
|
||||
SplitUV = SplitUV_C;
|
||||
}
|
||||
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
|
||||
SIMD_ALIGNED(uint8 rowuv[kMaxStride * 2]);
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
if ((y & 1) == 0) {
|
||||
// Copy a row of UV.
|
||||
SplitUV(src_uv, row, row + kMaxStride, halfwidth);
|
||||
SplitUV(src_uv, rowuv, rowuv + kMaxStride, halfwidth);
|
||||
src_uv += src_stride_uv;
|
||||
}
|
||||
FastConvertYUVToRGB565Row(src_y, row, row + kMaxStride, dst_rgb, width);
|
||||
FastConvertYUVToARGBRow(src_y, rowuv, rowuv + kMaxStride, row, width);
|
||||
ARGBToRGB565Row(row, dst_rgb, width);
|
||||
dst_rgb += dst_stride_rgb;
|
||||
src_y += src_stride_y;
|
||||
}
|
||||
|
||||
20
source/row.h
20
source/row.h
@ -63,6 +63,12 @@
|
||||
#define HAS_RGB565TOARGBROW_SSE2
|
||||
#define HAS_ARGB1555TOARGBROW_SSE2
|
||||
#define HAS_ARGB4444TOARGBROW_SSE2
|
||||
|
||||
#define HAS_ARGBTORGB24ROW_SSSE3
|
||||
#define HAS_ARGBTORAWROW_SSSE3
|
||||
#define HAS_ARGBTORGB565ROW_SSE2
|
||||
#define HAS_ARGBTOARGB1555ROW_SSE2
|
||||
#define HAS_ARGBTOARGB4444ROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on Neon platforms
|
||||
@ -210,6 +216,20 @@ void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
|
||||
void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
|
||||
void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
|
||||
|
||||
void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
|
||||
// ARGBToABGRRow_C is same as ABGRToARGB
|
||||
// ARGBToBGRARow_C is same as BGRAToARGB
|
||||
void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
|
||||
|
||||
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
|
||||
|
||||
|
||||
@ -85,6 +85,15 @@ static const uvec8 kShuffleMaskBGRAToARGB = {
|
||||
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
|
||||
};
|
||||
|
||||
// Shuffle table for converting ARGB to RGB24.
|
||||
static const uvec8 kShuffleMaskARGBToRGB24 = {
|
||||
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u };
|
||||
|
||||
|
||||
// Shuffle table for converting ARGB to RAW.
|
||||
static const uvec8 kShuffleMaskARGBToRAW = {
|
||||
2u, 1u,0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u };
|
||||
|
||||
__declspec(naked)
|
||||
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
|
||||
__asm {
|
||||
@ -494,6 +503,201 @@ __asm {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port to gcc
|
||||
__declspec(naked)
|
||||
void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_argb
|
||||
mov edx, [esp + 8] // dst_rgb
|
||||
mov ecx, [esp + 12] // pix
|
||||
movdqa xmm5, kShuffleMaskARGBToRGB24
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 16 pixels of argb
|
||||
movdqa xmm1, [eax + 16]
|
||||
movdqa xmm2, [eax + 32]
|
||||
movdqa xmm3, [eax + 48]
|
||||
lea eax, [eax + 64]
|
||||
pshufb xmm0, xmm5 // pack 16 bytes of ARGB to 12 bytes of RGB
|
||||
pshufb xmm1, xmm5
|
||||
pshufb xmm2, xmm5
|
||||
pshufb xmm3, xmm5
|
||||
movdqa xmm4, xmm1
|
||||
psllq xmm4, 12
|
||||
por xmm4, xmm0
|
||||
movdqa [edx], xmm4 // first 16 bytes
|
||||
movdqa xmm4, xmm2
|
||||
psrlq xmm1, 4
|
||||
psllq xmm4, 8
|
||||
por xmm1, xmm4
|
||||
movdqa [edx + 16], xmm1 // middle 16 bytes
|
||||
psrlq xmm2, 8
|
||||
psllq xmm3, 4
|
||||
por xmm2, xmm3
|
||||
movdqa [edx + 32], xmm2 // last 16 bytes
|
||||
lea edx, [edx + 48]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port to gcc
|
||||
__declspec(naked)
|
||||
void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_argb
|
||||
mov edx, [esp + 8] // dst_rgb
|
||||
mov ecx, [esp + 12] // pix
|
||||
movdqa xmm5, kShuffleMaskARGBToRAW
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 16 pixels of argb
|
||||
movdqa xmm1, [eax + 16]
|
||||
movdqa xmm2, [eax + 32]
|
||||
movdqa xmm3, [eax + 48]
|
||||
lea eax, [eax + 64]
|
||||
pshufb xmm0, xmm5 // pack 16 bytes of ARGB to 12 bytes of RGB
|
||||
pshufb xmm1, xmm5
|
||||
pshufb xmm2, xmm5
|
||||
pshufb xmm3, xmm5
|
||||
movdqa xmm4, xmm1
|
||||
psllq xmm4, 12
|
||||
por xmm4, xmm0
|
||||
movdqa [edx], xmm4 // first 16 bytes
|
||||
movdqa xmm4, xmm2
|
||||
psrlq xmm1, 4
|
||||
psllq xmm4, 8
|
||||
por xmm1, xmm4
|
||||
movdqa [edx + 16], xmm1 // middle 16 bytes
|
||||
psrlq xmm2, 8
|
||||
psllq xmm3, 4
|
||||
por xmm2, xmm3
|
||||
movdqa [edx + 32], xmm2 // last 16 bytes
|
||||
lea edx, [edx + 48]
|
||||
sub ecx, 16
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port to gcc
|
||||
__declspec(naked)
|
||||
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
pcmpeqb xmm3, xmm3 // generate mask 0x001f001f
|
||||
psrlw xmm3, 11
|
||||
pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0
|
||||
psrlw xmm4, 10
|
||||
psllw xmm4, 5
|
||||
pcmpeqb xmm5, xmm5 // generate mask 0xf800f800
|
||||
psrlw xmm5, 11
|
||||
|
||||
mov eax, [esp + 4] // src_argb
|
||||
mov edx, [esp + 8] // dst_rgb
|
||||
mov ecx, [esp + 12] // pix
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 4 pixels of argb
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0 // B
|
||||
psrlw xmm1, 3
|
||||
pand xmm1, xmm3
|
||||
movdqa xmm2, xmm0 // G
|
||||
psrlw xmm2, 5
|
||||
pand xmm2, xmm4
|
||||
por xmm1, xmm2
|
||||
psrlw xmm0, 8 // R
|
||||
pand xmm0, xmm5
|
||||
por xmm0, xmm1
|
||||
pslld xmm0, 16
|
||||
psrad xmm0, 16
|
||||
packssdw xmm0, xmm0
|
||||
movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 4
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port to gcc
|
||||
__declspec(naked)
|
||||
void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
pcmpeqb xmm3, xmm3 // generate mask 0x001f001f
|
||||
psrlw xmm3, 11
|
||||
movdqa xmm4, xmm3 // generate mask 0x03e003e0
|
||||
psllw xmm4, 5
|
||||
movdqa xmm5, xmm3 // generate mask 0x7c007c00
|
||||
psllw xmm5, 10
|
||||
pcmpeqb xmm6, xmm6 // generate mask 0x80008000
|
||||
psrlw xmm6, 15
|
||||
|
||||
mov eax, [esp + 4] // src_argb
|
||||
mov edx, [esp + 8] // dst_rgb
|
||||
mov ecx, [esp + 12] // pix
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 4 pixels of argb
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0 // B
|
||||
psrlw xmm1, 3
|
||||
pand xmm1, xmm3
|
||||
movdqa xmm2, xmm0 // G
|
||||
psrlw xmm2, 6
|
||||
pand xmm2, xmm4
|
||||
por xmm1, xmm2
|
||||
movdqa xmm2, xmm0 // R
|
||||
psrlw xmm2, 9
|
||||
pand xmm2, xmm5
|
||||
por xmm1, xmm2
|
||||
movdqa xmm2, xmm0 // A
|
||||
psrlw xmm2, 16
|
||||
pand xmm2, xmm6
|
||||
por xmm1, xmm2
|
||||
pslld xmm0, 16
|
||||
psrad xmm0, 16
|
||||
packssdw xmm1, xmm1
|
||||
movq qword ptr [edx], xmm1 // store 4 pixels of ARGB1555
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 4
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port to gcc
|
||||
__declspec(naked)
|
||||
void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
|
||||
__asm {
|
||||
pcmpeqb xmm4, xmm4 // generate mask 0xf000f000
|
||||
psllw xmm4, 12
|
||||
movdqa xmm3, xmm4 // generate mask 0x00f000f0
|
||||
psrlw xmm3, 8
|
||||
|
||||
mov eax, [esp + 4] // src_argb
|
||||
mov edx, [esp + 8] // dst_rgb
|
||||
mov ecx, [esp + 12] // pix
|
||||
|
||||
convertloop:
|
||||
movdqa xmm0, [eax] // fetch 4 pixels of argb
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm1, xmm0
|
||||
pand xmm0, xmm3 // low nibble
|
||||
pand xmm1, xmm4 // high nibble
|
||||
psrl xmm0, 4
|
||||
psrl xmm1, 8
|
||||
por xmm0, xmm1
|
||||
packuswb xmm0, xmm0
|
||||
movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444
|
||||
lea edx, [edx + 8]
|
||||
sub ecx, 4
|
||||
ja convertloop
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Convert 16 ARGB pixels (64 bytes) to 16 Y values
|
||||
__declspec(naked)
|
||||
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user