mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
YUY2ToARGB and UYVYToARGB use specialized row functions that do not subsample.
BUG=76 TEST=unittests for YUY2ToARGB and UYVYToARGB Review URL: https://webrtc-codereview.appspot.com/763006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@334 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
6343f22bb5
commit
c704f789e9
@ -618,21 +618,21 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
|||||||
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
|
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
|
||||||
src_stride_yuy2 = -src_stride_yuy2;
|
src_stride_yuy2 = -src_stride_yuy2;
|
||||||
}
|
}
|
||||||
void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
|
void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||||
uint8* dst_u, uint8* dst_v, int pix) = YUY2ToUVRow_C;
|
int pix) = YUY2ToUV422Row_C;
|
||||||
void (*YUY2ToYRow)(const uint8* src_yuy2,
|
void (*YUY2ToYRow)(const uint8* src_yuy2,
|
||||||
uint8* dst_y, int pix) = YUY2ToYRow_C;
|
uint8* dst_y, int pix) = YUY2ToYRow_C;
|
||||||
#if defined(HAS_YUY2TOYROW_SSE2)
|
#if defined(HAS_YUY2TOYROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
if (width > 16) {
|
if (width > 16) {
|
||||||
YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
|
YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
|
||||||
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
|
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
|
||||||
}
|
}
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
|
YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
|
||||||
YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
|
YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
|
||||||
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
|
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
|
||||||
YUY2ToUVRow = YUY2ToUVRow_SSE2;
|
YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
|
||||||
YUY2ToYRow = YUY2ToYRow_SSE2;
|
YUY2ToYRow = YUY2ToYRow_SSE2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -665,7 +665,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
|||||||
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
||||||
|
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
YUY2ToUVRow(src_yuy2, 0, rowu, rowv, width);
|
YUY2ToUV422Row(src_yuy2, rowu, rowv, width);
|
||||||
YUY2ToYRow(src_yuy2, rowy, width);
|
YUY2ToYRow(src_yuy2, rowy, width);
|
||||||
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
|
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
|
||||||
src_yuy2 += src_stride_yuy2;
|
src_yuy2 += src_stride_yuy2;
|
||||||
@ -688,21 +688,21 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
|||||||
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
|
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
|
||||||
src_stride_uyvy = -src_stride_uyvy;
|
src_stride_uyvy = -src_stride_uyvy;
|
||||||
}
|
}
|
||||||
void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
|
void (*UYVYToUV422Row)(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||||
uint8* dst_u, uint8* dst_v, int pix) = UYVYToUVRow_C;
|
int pix) = UYVYToUV422Row_C;
|
||||||
void (*UYVYToYRow)(const uint8* src_uyvy,
|
void (*UYVYToYRow)(const uint8* src_uyvy,
|
||||||
uint8* dst_y, int pix) = UYVYToYRow_C;
|
uint8* dst_y, int pix) = UYVYToYRow_C;
|
||||||
#if defined(HAS_UYVYTOYROW_SSE2)
|
#if defined(HAS_UYVYTOYROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
if (width > 16) {
|
if (width > 16) {
|
||||||
UYVYToUVRow = UYVYToUVRow_Any_SSE2;
|
UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
|
||||||
UYVYToYRow = UYVYToYRow_Any_SSE2;
|
UYVYToYRow = UYVYToYRow_Any_SSE2;
|
||||||
}
|
}
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
|
UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
|
||||||
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
|
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
|
||||||
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
|
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
|
||||||
UYVYToUVRow = UYVYToUVRow_SSE2;
|
UYVYToUV422Row = UYVYToUV422Row_SSE2;
|
||||||
UYVYToYRow = UYVYToYRow_SSE2;
|
UYVYToYRow = UYVYToYRow_SSE2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -733,8 +733,9 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
|||||||
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
|
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
|
||||||
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
|
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
|
||||||
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
|
||||||
|
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
UYVYToUVRow(src_uyvy, 0, rowu, rowv, width);
|
UYVYToUV422Row(src_uyvy, rowu, rowv, width);
|
||||||
UYVYToYRow(src_uyvy, rowy, width);
|
UYVYToYRow(src_uyvy, rowy, width);
|
||||||
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
|
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
|
||||||
src_uyvy += src_stride_uyvy;
|
src_uyvy += src_stride_uyvy;
|
||||||
|
|||||||
40
source/row.h
40
source/row.h
@ -70,7 +70,6 @@ extern "C" {
|
|||||||
#define HAS_UYVYTOUVROW_SSE2
|
#define HAS_UYVYTOUVROW_SSE2
|
||||||
#define HAS_UYVYTOYROW_SSE2
|
#define HAS_UYVYTOYROW_SSE2
|
||||||
#define HAS_YTOARGBROW_SSE2
|
#define HAS_YTOARGBROW_SSE2
|
||||||
#define HAS_YUY2TOUVROW_SSE2
|
|
||||||
#define HAS_YUY2TOYROW_SSE2
|
#define HAS_YUY2TOYROW_SSE2
|
||||||
#define HAS_ARGBGRAYROW_SSSE3
|
#define HAS_ARGBGRAYROW_SSSE3
|
||||||
#define HAS_ARGBSEPIAROW_SSSE3
|
#define HAS_ARGBSEPIAROW_SSSE3
|
||||||
@ -455,33 +454,48 @@ void I422ToABGRRow_Any_NEON(const uint8* y_buf,
|
|||||||
|
|
||||||
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
|
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
|
||||||
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||||
uint8* dst_u, uint8* dst_y, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
||||||
uint8* dst_y, int pix);
|
uint8* dst_y, int pix);
|
||||||
void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||||
uint8* dst_u, uint8* dst_y, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
|
||||||
|
void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void YUY2ToUV422Row_C(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
|
||||||
|
void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
|
||||||
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
|
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
|
||||||
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_y, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
||||||
uint8* dst_y, int pix);
|
uint8* dst_y, int pix);
|
||||||
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_y, int pix);
|
|
||||||
|
|
||||||
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
|
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
|
void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
|
||||||
void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
|
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
|
void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
|
||||||
|
void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
|
||||||
void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int src_stride_yuy2,
|
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
|
void UYVYToUV422Row_C(const uint8* src_uyvy,
|
||||||
void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int src_stride_uyvy,
|
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
|
void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
|
||||||
|
void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
|
||||||
|
|
||||||
void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
|||||||
@ -621,10 +621,10 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) {
|
|||||||
memcpy(dst, src, count);
|
memcpy(dst, src, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter 2 rows of YUY2 UV's (422) into U and V (420)
|
// Filter 2 rows of YUY2 UV's (422) into U and V (420).
|
||||||
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
|
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
// Output a row of UV values, filtering 2 rows of YUY2
|
// Output a row of UV values, filtering 2 rows of YUY2.
|
||||||
for (int x = 0; x < width; x += 2) {
|
for (int x = 0; x < width; x += 2) {
|
||||||
dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
|
dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
|
||||||
dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
|
dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
|
||||||
@ -634,8 +634,22 @@ void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Copy row of YUY2 UV's (422) into U and V (422).
|
||||||
|
void YUY2ToUV422Row_C(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
// Output a row of UV values.
|
||||||
|
for (int x = 0; x < width; x += 2) {
|
||||||
|
dst_u[0] = src_yuy2[1];
|
||||||
|
dst_v[0] = src_yuy2[3];
|
||||||
|
src_yuy2 += 4;
|
||||||
|
dst_u += 1;
|
||||||
|
dst_v += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy row of YUY2 Y's (422) into Y (420/422).
|
||||||
void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
|
void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
|
||||||
// Copy a row of yuy2 Y values
|
// Output a row of Y values.
|
||||||
for (int x = 0; x < width - 1; x += 2) {
|
for (int x = 0; x < width - 1; x += 2) {
|
||||||
dst_y[x] = src_yuy2[0];
|
dst_y[x] = src_yuy2[0];
|
||||||
dst_y[x + 1] = src_yuy2[2];
|
dst_y[x + 1] = src_yuy2[2];
|
||||||
@ -646,9 +660,10 @@ void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter 2 rows of UYVY UV's (422) into U and V (420).
|
||||||
void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
|
void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
// Copy a row of uyvy UV values
|
// Output a row of UV values.
|
||||||
for (int x = 0; x < width; x += 2) {
|
for (int x = 0; x < width; x += 2) {
|
||||||
dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
|
dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
|
||||||
dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
|
dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
|
||||||
@ -658,15 +673,29 @@ void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void UYVYToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
|
// Copy row of UYVY UV's (422) into U and V (422).
|
||||||
// Copy a row of uyvy Y values
|
void UYVYToUV422Row_C(const uint8* src_uyvy,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
// Output a row of UV values.
|
||||||
|
for (int x = 0; x < width; x += 2) {
|
||||||
|
dst_u[0] = src_uyvy[0];
|
||||||
|
dst_v[0] = src_uyvy[2];
|
||||||
|
src_uyvy += 4;
|
||||||
|
dst_u += 1;
|
||||||
|
dst_v += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy row of UYVY Y's (422) into Y (420/422).
|
||||||
|
void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
|
||||||
|
// Output a row of Y values.
|
||||||
for (int x = 0; x < width - 1; x += 2) {
|
for (int x = 0; x < width - 1; x += 2) {
|
||||||
dst_y[x] = src_yuy2[1];
|
dst_y[x] = src_uyvy[1];
|
||||||
dst_y[x + 1] = src_yuy2[3];
|
dst_y[x + 1] = src_uyvy[3];
|
||||||
src_yuy2 += 4;
|
src_uyvy += 4;
|
||||||
}
|
}
|
||||||
if (width & 1) {
|
if (width & 1) {
|
||||||
dst_y[width - 1] = src_yuy2[1];
|
dst_y[width - 1] = src_uyvy[1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -910,12 +939,12 @@ YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2)
|
|||||||
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
|
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2)
|
||||||
#undef YANY
|
#undef YANY
|
||||||
|
|
||||||
#define UVANY(NAMEANY, ARGBTOUV_SSE, ARGBTOUV_C, BPP) \
|
#define UVANY(NAMEANY, ANYTOUV_SSE, ANYTOUV_C, BPP) \
|
||||||
void NAMEANY(const uint8* src_argb, int src_stride_argb, \
|
void NAMEANY(const uint8* src_argb, int src_stride_argb, \
|
||||||
uint8* dst_u, uint8* dst_v, int width) { \
|
uint8* dst_u, uint8* dst_v, int width) { \
|
||||||
int n = width & ~15; \
|
int n = width & ~15; \
|
||||||
ARGBTOUV_SSE(src_argb, src_stride_argb, dst_u, dst_v, n); \
|
ANYTOUV_SSE(src_argb, src_stride_argb, dst_u, dst_v, n); \
|
||||||
ARGBTOUV_C(src_argb + n * BPP, src_stride_argb, \
|
ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \
|
||||||
dst_u + (n >> 1), \
|
dst_u + (n >> 1), \
|
||||||
dst_v + (n >> 1), \
|
dst_v + (n >> 1), \
|
||||||
width & 15); \
|
width & 15); \
|
||||||
@ -927,6 +956,24 @@ UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4)
|
|||||||
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2)
|
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2)
|
||||||
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
|
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
|
||||||
#undef UVANY
|
#undef UVANY
|
||||||
|
|
||||||
|
#define UV422ANY(NAMEANY, ANYTOUV_SSE, ANYTOUV_C, BPP) \
|
||||||
|
void NAMEANY(const uint8* src_argb, \
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) { \
|
||||||
|
int n = width & ~15; \
|
||||||
|
ANYTOUV_SSE(src_argb, dst_u, dst_v, n); \
|
||||||
|
ANYTOUV_C(src_argb + n * BPP, \
|
||||||
|
dst_u + (n >> 1), \
|
||||||
|
dst_v + (n >> 1), \
|
||||||
|
width & 15); \
|
||||||
|
}
|
||||||
|
|
||||||
|
UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2, \
|
||||||
|
YUY2ToUV422Row_C, 2)
|
||||||
|
UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2, \
|
||||||
|
UYVYToUV422Row_C, 2)
|
||||||
|
#undef UV422ANY
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
|
void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
|
||||||
|
|||||||
@ -2114,7 +2114,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
@ -2143,7 +2143,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
|||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_yuy2), // %0
|
: "+r"(src_yuy2), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_y), // %2
|
"+r"(dst_v), // %2
|
||||||
"+r"(pix) // %3
|
"+r"(pix) // %3
|
||||||
: "r"(static_cast<intptr_t>(stride_yuy2)) // %4
|
: "r"(static_cast<intptr_t>(stride_yuy2)) // %4
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
@ -2153,6 +2153,41 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
asm volatile (
|
||||||
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm0 \n"
|
||||||
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
|
"lea 0x20(%0),%0 \n"
|
||||||
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm1 \n"
|
||||||
|
"pand %%xmm5,%%xmm0 \n"
|
||||||
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm1 \n"
|
||||||
|
"movq %%xmm0,(%1) \n"
|
||||||
|
"movq %%xmm1,(%1,%2) \n"
|
||||||
|
"lea 0x8(%1),%1 \n"
|
||||||
|
"sub $0x10,%3 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_yuy2), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+r"(pix) // %3
|
||||||
|
:
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
||||||
uint8* dst_y, int pix) {
|
uint8* dst_y, int pix) {
|
||||||
@ -2214,7 +2249,7 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
|
|||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_yuy2), // %0
|
: "+r"(src_yuy2), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_y), // %2
|
"+r"(dst_v), // %2
|
||||||
"+r"(pix) // %3
|
"+r"(pix) // %3
|
||||||
: "r"(static_cast<intptr_t>(stride_yuy2)) // %4
|
: "r"(static_cast<intptr_t>(stride_yuy2)) // %4
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
@ -2224,6 +2259,42 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
asm volatile (
|
||||||
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"movdqu (%0),%%xmm0 \n"
|
||||||
|
"movdqu 0x10(%0),%%xmm1 \n"
|
||||||
|
"lea 0x20(%0),%0 \n"
|
||||||
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm1 \n"
|
||||||
|
"pand %%xmm5,%%xmm0 \n"
|
||||||
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm1 \n"
|
||||||
|
"movq %%xmm0,(%1) \n"
|
||||||
|
"movq %%xmm1,(%1,%2) \n"
|
||||||
|
"lea 0x8(%1),%1 \n"
|
||||||
|
"sub $0x10,%3 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_yuy2), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+r"(pix) // %3
|
||||||
|
:
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
|
void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
@ -2250,7 +2321,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
@ -2279,7 +2350,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_uyvy), // %0
|
: "+r"(src_uyvy), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_y), // %2
|
"+r"(dst_v), // %2
|
||||||
"+r"(pix) // %3
|
"+r"(pix) // %3
|
||||||
: "r"(static_cast<intptr_t>(stride_uyvy)) // %4
|
: "r"(static_cast<intptr_t>(stride_uyvy)) // %4
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
@ -2289,6 +2360,42 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
asm volatile (
|
||||||
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm0 \n"
|
||||||
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
|
"lea 0x20(%0),%0 \n"
|
||||||
|
"pand %%xmm5,%%xmm0 \n"
|
||||||
|
"pand %%xmm5,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm1 \n"
|
||||||
|
"pand %%xmm5,%%xmm0 \n"
|
||||||
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm1 \n"
|
||||||
|
"movq %%xmm0,(%1) \n"
|
||||||
|
"movq %%xmm1,(%1,%2) \n"
|
||||||
|
"lea 0x8(%1),%1 \n"
|
||||||
|
"sub $0x10,%3 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_uyvy), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+r"(pix) // %3
|
||||||
|
:
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
||||||
uint8* dst_y, int pix) {
|
uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -2316,7 +2423,7 @@ void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $0x8,%%xmm5 \n"
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
@ -2345,7 +2452,7 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_uyvy), // %0
|
: "+r"(src_uyvy), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_y), // %2
|
"+r"(dst_v), // %2
|
||||||
"+r"(pix) // %3
|
"+r"(pix) // %3
|
||||||
: "r"(static_cast<intptr_t>(stride_uyvy)) // %4
|
: "r"(static_cast<intptr_t>(stride_uyvy)) // %4
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
@ -2354,6 +2461,42 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
asm volatile (
|
||||||
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
|
"psrlw $0x8,%%xmm5 \n"
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"movdqu (%0),%%xmm0 \n"
|
||||||
|
"movdqu 0x10(%0),%%xmm1 \n"
|
||||||
|
"lea 0x20(%0),%0 \n"
|
||||||
|
"pand %%xmm5,%%xmm0 \n"
|
||||||
|
"pand %%xmm5,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm1 \n"
|
||||||
|
"pand %%xmm5,%%xmm0 \n"
|
||||||
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm1 \n"
|
||||||
|
"movq %%xmm0,(%1) \n"
|
||||||
|
"movq %%xmm1,(%1,%2) \n"
|
||||||
|
"lea 0x8(%1),%1 \n"
|
||||||
|
"sub $0x10,%3 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_uyvy), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+r"(pix) // %3
|
||||||
|
:
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
#endif // HAS_YUY2TOYROW_SSE2
|
#endif // HAS_YUY2TOYROW_SSE2
|
||||||
|
|
||||||
#ifdef HAS_ARGBBLENDROW_SSE2
|
#ifdef HAS_ARGBBLENDROW_SSE2
|
||||||
|
|||||||
@ -2198,7 +2198,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2,
|
|||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
push edi
|
push edi
|
||||||
@ -2240,6 +2240,43 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
__asm {
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 4 + 4] // src_yuy2
|
||||||
|
mov edx, [esp + 4 + 8] // dst_u
|
||||||
|
mov edi, [esp + 4 + 12] // dst_v
|
||||||
|
mov ecx, [esp + 4 + 16] // pix
|
||||||
|
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||||
|
psrlw xmm5, 8
|
||||||
|
sub edi, edx
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movdqa xmm0, [eax]
|
||||||
|
movdqa xmm1, [eax + 16]
|
||||||
|
lea eax, [eax + 32]
|
||||||
|
psrlw xmm0, 8 // YUYV -> UVUV
|
||||||
|
psrlw xmm1, 8
|
||||||
|
packuswb xmm0, xmm1
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
pand xmm0, xmm5 // U
|
||||||
|
packuswb xmm0, xmm0
|
||||||
|
psrlw xmm1, 8 // V
|
||||||
|
packuswb xmm1, xmm1
|
||||||
|
movq qword ptr [edx], xmm0
|
||||||
|
movq qword ptr [edx + edi], xmm1
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
sub ecx, 16
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
||||||
uint8* dst_y, int pix) {
|
uint8* dst_y, int pix) {
|
||||||
@ -2268,7 +2305,7 @@ void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
|
|||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
push edi
|
push edi
|
||||||
@ -2310,6 +2347,43 @@ void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
__asm {
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 4 + 4] // src_yuy2
|
||||||
|
mov edx, [esp + 4 + 8] // dst_u
|
||||||
|
mov edi, [esp + 4 + 12] // dst_v
|
||||||
|
mov ecx, [esp + 4 + 16] // pix
|
||||||
|
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||||
|
psrlw xmm5, 8
|
||||||
|
sub edi, edx
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movdqu xmm0, [eax]
|
||||||
|
movdqu xmm1, [eax + 16]
|
||||||
|
lea eax, [eax + 32]
|
||||||
|
psrlw xmm0, 8 // YUYV -> UVUV
|
||||||
|
psrlw xmm1, 8
|
||||||
|
packuswb xmm0, xmm1
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
pand xmm0, xmm5 // U
|
||||||
|
packuswb xmm0, xmm0
|
||||||
|
psrlw xmm1, 8 // V
|
||||||
|
packuswb xmm1, xmm1
|
||||||
|
movq qword ptr [edx], xmm0
|
||||||
|
movq qword ptr [edx + edi], xmm1
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
sub ecx, 16
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void UYVYToYRow_SSE2(const uint8* src_uyvy,
|
void UYVYToYRow_SSE2(const uint8* src_uyvy,
|
||||||
uint8* dst_y, int pix) {
|
uint8* dst_y, int pix) {
|
||||||
@ -2336,7 +2410,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy,
|
|||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
push edi
|
push edi
|
||||||
@ -2378,6 +2452,43 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
__asm {
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 4 + 4] // src_yuy2
|
||||||
|
mov edx, [esp + 4 + 8] // dst_u
|
||||||
|
mov edi, [esp + 4 + 12] // dst_v
|
||||||
|
mov ecx, [esp + 4 + 16] // pix
|
||||||
|
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||||
|
psrlw xmm5, 8
|
||||||
|
sub edi, edx
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movdqa xmm0, [eax]
|
||||||
|
movdqa xmm1, [eax + 16]
|
||||||
|
lea eax, [eax + 32]
|
||||||
|
pand xmm0, xmm5 // UYVY -> UVUV
|
||||||
|
pand xmm1, xmm5
|
||||||
|
packuswb xmm0, xmm1
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
pand xmm0, xmm5 // U
|
||||||
|
packuswb xmm0, xmm0
|
||||||
|
psrlw xmm1, 8 // V
|
||||||
|
packuswb xmm1, xmm1
|
||||||
|
movq qword ptr [edx], xmm0
|
||||||
|
movq qword ptr [edx + edi], xmm1
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
sub ecx, 16
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
||||||
uint8* dst_y, int pix) {
|
uint8* dst_y, int pix) {
|
||||||
@ -2404,7 +2515,7 @@ void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
|
|||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
||||||
uint8* dst_u, uint8* dst_y, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
__asm {
|
__asm {
|
||||||
push esi
|
push esi
|
||||||
push edi
|
push edi
|
||||||
@ -2445,6 +2556,43 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
__asm {
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 4 + 4] // src_yuy2
|
||||||
|
mov edx, [esp + 4 + 8] // dst_u
|
||||||
|
mov edi, [esp + 4 + 12] // dst_v
|
||||||
|
mov ecx, [esp + 4 + 16] // pix
|
||||||
|
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
|
||||||
|
psrlw xmm5, 8
|
||||||
|
sub edi, edx
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movdqu xmm0, [eax]
|
||||||
|
movdqu xmm1, [eax + 16]
|
||||||
|
lea eax, [eax + 32]
|
||||||
|
pand xmm0, xmm5 // UYVY -> UVUV
|
||||||
|
pand xmm1, xmm5
|
||||||
|
packuswb xmm0, xmm1
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
pand xmm0, xmm5 // U
|
||||||
|
packuswb xmm0, xmm0
|
||||||
|
psrlw xmm1, 8 // V
|
||||||
|
packuswb xmm1, xmm1
|
||||||
|
movq qword ptr [edx], xmm0
|
||||||
|
movq qword ptr [edx + edi], xmm1
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
sub ecx, 16
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif // HAS_YUY2TOYROW_SSE2
|
#endif // HAS_YUY2TOYROW_SSE2
|
||||||
|
|
||||||
#ifdef HAS_ARGBBLENDROW_SSE2
|
#ifdef HAS_ARGBBLENDROW_SSE2
|
||||||
|
|||||||
@ -31,11 +31,12 @@ namespace libyuv {
|
|||||||
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \
|
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \
|
||||||
const int kWidth = 1280; \
|
const int kWidth = 1280; \
|
||||||
const int kHeight = 720; \
|
const int kHeight = 720; \
|
||||||
|
const int kStride = (kWidth * 8 * BPP_B + 7) / 8; \
|
||||||
align_buffer_16(src_y, kWidth * kHeight); \
|
align_buffer_16(src_y, kWidth * kHeight); \
|
||||||
align_buffer_16(src_u, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
align_buffer_16(src_u, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
||||||
align_buffer_16(src_v, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
align_buffer_16(src_v, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
||||||
align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \
|
align_buffer_16(dst_argb_c, kStride * kHeight); \
|
||||||
align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \
|
align_buffer_16(dst_argb_opt, kStride * kHeight); \
|
||||||
srandom(time(NULL)); \
|
srandom(time(NULL)); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kWidth; ++j) \
|
for (int j = 0; j < kWidth; ++j) \
|
||||||
@ -49,16 +50,16 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \
|
|||||||
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
|
||||||
src_u, kWidth / SUBSAMP_X, \
|
src_u, kWidth / SUBSAMP_X, \
|
||||||
src_v, kWidth / SUBSAMP_X, \
|
src_v, kWidth / SUBSAMP_X, \
|
||||||
dst_argb_c, kWidth * BPP_B, \
|
dst_argb_c, kStride, \
|
||||||
kWidth, NEG##kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(-1); \
|
MaskCpuFlags(-1); \
|
||||||
const int runs = 1000; \
|
const int runs = 1000; \
|
||||||
for (int i = 0; i < runs; ++i) { \
|
for (int i = 0; i < runs; ++i) { \
|
||||||
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
|
||||||
src_u, kWidth / SUBSAMP_X, \
|
src_u, kWidth / SUBSAMP_X, \
|
||||||
src_v, kWidth / SUBSAMP_X, \
|
src_v, kWidth / SUBSAMP_X, \
|
||||||
dst_argb_opt, kWidth * BPP_B, \
|
dst_argb_opt, kStride, \
|
||||||
kWidth, NEG##kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
int max_diff = 0; \
|
int max_diff = 0; \
|
||||||
for (int i = 0; i < kHeight; ++i) { \
|
for (int i = 0; i < kHeight; ++i) { \
|
||||||
@ -96,7 +97,7 @@ TESTPLANARTOB(I422, 2, 1, ARGB, 4)
|
|||||||
TESTPLANARTOB(I444, 1, 1, ARGB, 4)
|
TESTPLANARTOB(I444, 1, 1, ARGB, 4)
|
||||||
TESTPLANARTOB(I420, 2, 2, YUY2, 2)
|
TESTPLANARTOB(I420, 2, 2, YUY2, 2)
|
||||||
TESTPLANARTOB(I420, 2, 2, UYVY, 2)
|
TESTPLANARTOB(I420, 2, 2, UYVY, 2)
|
||||||
TESTPLANARTOB(I420, 2, 2, V210, 45 / 16)
|
TESTPLANARTOB(I420, 2, 2, V210, 16 / 6)
|
||||||
TESTPLANARTOB(I420, 2, 2, I400, 1)
|
TESTPLANARTOB(I420, 2, 2, I400, 1)
|
||||||
TESTPLANARTOB(I420, 2, 2, BayerBGGR, 1)
|
TESTPLANARTOB(I420, 2, 2, BayerBGGR, 1)
|
||||||
TESTPLANARTOB(I420, 2, 2, BayerRGGB, 1)
|
TESTPLANARTOB(I420, 2, 2, BayerRGGB, 1)
|
||||||
@ -124,14 +125,14 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \
|
|||||||
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
|
||||||
src_uv, kWidth / SUBSAMP_X * 2, \
|
src_uv, kWidth / SUBSAMP_X * 2, \
|
||||||
dst_argb_c, kWidth * BPP_B, \
|
dst_argb_c, kWidth * BPP_B, \
|
||||||
kWidth, NEG##kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(-1); \
|
MaskCpuFlags(-1); \
|
||||||
const int runs = 1000; \
|
const int runs = 1000; \
|
||||||
for (int i = 0; i < runs; ++i) { \
|
for (int i = 0; i < runs; ++i) { \
|
||||||
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
|
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
|
||||||
src_uv, kWidth / SUBSAMP_X * 2, \
|
src_uv, kWidth / SUBSAMP_X * 2, \
|
||||||
dst_argb_opt, kWidth * BPP_B, \
|
dst_argb_opt, kWidth * BPP_B, \
|
||||||
kWidth, NEG##kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
int max_diff = 0; \
|
int max_diff = 0; \
|
||||||
for (int i = 0; i < kHeight; ++i) { \
|
for (int i = 0; i < kHeight; ++i) { \
|
||||||
@ -164,7 +165,8 @@ TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2)
|
|||||||
TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N##_OptVsC) { \
|
TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N##_OptVsC) { \
|
||||||
const int kWidth = 1280; \
|
const int kWidth = 1280; \
|
||||||
const int kHeight = 720; \
|
const int kHeight = 720; \
|
||||||
align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight); \
|
const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \
|
||||||
|
align_buffer_16(src_argb, kStride * kHeight); \
|
||||||
align_buffer_16(dst_y_c, kWidth * kHeight); \
|
align_buffer_16(dst_y_c, kWidth * kHeight); \
|
||||||
align_buffer_16(dst_u_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
align_buffer_16(dst_u_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
||||||
align_buffer_16(dst_v_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
align_buffer_16(dst_v_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
||||||
@ -173,22 +175,22 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N##_OptVsC) { \
|
|||||||
align_buffer_16(dst_v_opt, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
align_buffer_16(dst_v_opt, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
|
||||||
srandom(time(NULL)); \
|
srandom(time(NULL)); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kWidth * BPP_A; ++j) \
|
for (int j = 0; j < kStride; ++j) \
|
||||||
src_argb[(i * kWidth * BPP_A) + j] = (random() & 0xff); \
|
src_argb[(i * kStride) + j] = (random() & 0xff); \
|
||||||
MaskCpuFlags(kCpuInitialized); \
|
MaskCpuFlags(kCpuInitialized); \
|
||||||
FMT_A##To##FMT_PLANAR(src_argb, kWidth * BPP_A, \
|
FMT_A##To##FMT_PLANAR(src_argb, kStride, \
|
||||||
dst_y_c, kWidth, \
|
dst_y_c, kWidth, \
|
||||||
dst_u_c, kWidth / SUBSAMP_X, \
|
dst_u_c, kWidth / SUBSAMP_X, \
|
||||||
dst_v_c, kWidth / SUBSAMP_X, \
|
dst_v_c, kWidth / SUBSAMP_X, \
|
||||||
kWidth, NEG##kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(-1); \
|
MaskCpuFlags(-1); \
|
||||||
const int runs = 1000; \
|
const int runs = 1000; \
|
||||||
for (int i = 0; i < runs; ++i) { \
|
for (int i = 0; i < runs; ++i) { \
|
||||||
FMT_A##To##FMT_PLANAR(src_argb, kWidth * BPP_A, \
|
FMT_A##To##FMT_PLANAR(src_argb, kStride, \
|
||||||
dst_y_opt, kWidth, \
|
dst_y_opt, kWidth, \
|
||||||
dst_u_opt, kWidth / SUBSAMP_X, \
|
dst_u_opt, kWidth / SUBSAMP_X, \
|
||||||
dst_v_opt, kWidth / SUBSAMP_X, \
|
dst_v_opt, kWidth / SUBSAMP_X, \
|
||||||
kWidth, NEG##kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
int max_diff = 0; \
|
int max_diff = 0; \
|
||||||
for (int i = 0; i < kHeight; ++i) { \
|
for (int i = 0; i < kHeight; ++i) { \
|
||||||
@ -251,7 +253,7 @@ TESTATOPLANAR(ARGB, 4, I422, 2, 1)
|
|||||||
// TODO(fbarchard): Implement and test 411 and 444
|
// TODO(fbarchard): Implement and test 411 and 444
|
||||||
TESTATOPLANAR(YUY2, 2, I420, 2, 2)
|
TESTATOPLANAR(YUY2, 2, I420, 2, 2)
|
||||||
TESTATOPLANAR(UYVY, 2, I420, 2, 2)
|
TESTATOPLANAR(UYVY, 2, I420, 2, 2)
|
||||||
TESTATOPLANAR(V210, 45 / 16, I420, 2, 2)
|
TESTATOPLANAR(V210, 16 / 6, I420, 2, 2)
|
||||||
TESTATOPLANAR(I400, 1, I420, 2, 2)
|
TESTATOPLANAR(I400, 1, I420, 2, 2)
|
||||||
TESTATOPLANAR(BayerBGGR, 1, I420, 2, 2)
|
TESTATOPLANAR(BayerBGGR, 1, I420, 2, 2)
|
||||||
TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2)
|
TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2)
|
||||||
@ -272,13 +274,13 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N##_OptVsC) { \
|
|||||||
MaskCpuFlags(kCpuInitialized); \
|
MaskCpuFlags(kCpuInitialized); \
|
||||||
FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
|
FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
|
||||||
dst_argb_c, kWidth * BPP_B, \
|
dst_argb_c, kWidth * BPP_B, \
|
||||||
kWidth, NEG##kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(-1); \
|
MaskCpuFlags(-1); \
|
||||||
const int runs = 1000; \
|
const int runs = 1000; \
|
||||||
for (int i = 0; i < runs; ++i) { \
|
for (int i = 0; i < runs; ++i) { \
|
||||||
FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
|
FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
|
||||||
dst_argb_opt, kWidth * BPP_B, \
|
dst_argb_opt, kWidth * BPP_B, \
|
||||||
kWidth, NEG##kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
int max_diff = 0; \
|
int max_diff = 0; \
|
||||||
for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \
|
for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user