mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Recomputed JPeg coefficients normalized to 128. Apply to ARGBGray function reusing YJ function/coefficients and rounding.
BUG=201 TESTED=Gray unittest improved Review URL: https://webrtc-codereview.appspot.com/1269006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@629 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
6a352141ef
commit
050b39a5cb
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 628
|
Version: 629
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -54,6 +54,7 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOUV422ROW_SSSE3
|
#define HAS_ARGBTOUV422ROW_SSSE3
|
||||||
#define HAS_ARGBTOUV444ROW_SSSE3
|
#define HAS_ARGBTOUV444ROW_SSSE3
|
||||||
#define HAS_ARGBTOUVROW_SSSE3
|
#define HAS_ARGBTOUVROW_SSSE3
|
||||||
|
#define HAS_ARGBTOUVJROW_SSSE3
|
||||||
#define HAS_ARGBTOYROW_SSSE3
|
#define HAS_ARGBTOYROW_SSSE3
|
||||||
#define HAS_ARGBTOYJROW_SSSE3
|
#define HAS_ARGBTOYJROW_SSSE3
|
||||||
#define HAS_BGRATOUVROW_SSSE3
|
#define HAS_BGRATOUVROW_SSSE3
|
||||||
@ -203,6 +204,7 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOUV422ROW_NEON
|
#define HAS_ARGBTOUV422ROW_NEON
|
||||||
#define HAS_ARGBTOUV444ROW_NEON
|
#define HAS_ARGBTOUV444ROW_NEON
|
||||||
#define HAS_ARGBTOUVROW_NEON
|
#define HAS_ARGBTOUVROW_NEON
|
||||||
|
#define HAS_ARGBTOUVJROW_NEON
|
||||||
#define HAS_ARGBTOYROW_NEON
|
#define HAS_ARGBTOYROW_NEON
|
||||||
#define HAS_ARGBTOYJROW_NEON
|
#define HAS_ARGBTOYJROW_NEON
|
||||||
#define HAS_BGRATOUVROW_NEON
|
#define HAS_BGRATOUVROW_NEON
|
||||||
@ -423,6 +425,8 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
|||||||
int pix);
|
int pix);
|
||||||
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||||
@ -481,6 +485,8 @@ void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
|
|||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
|
void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||||
@ -489,6 +495,8 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
|
|||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
|
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||||
@ -497,6 +505,8 @@ void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
|
|||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
|
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||||
@ -511,6 +521,8 @@ void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
|||||||
int pix);
|
int pix);
|
||||||
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
|
void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
|
void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||||
@ -531,6 +543,8 @@ void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
|
|||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
|
void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
|
void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
|
void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 628
|
#define LIBYUV_VERSION 629
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -994,19 +994,19 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
|||||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||||
src_stride_argb = -src_stride_argb;
|
src_stride_argb = -src_stride_argb;
|
||||||
}
|
}
|
||||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
|
||||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
|
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
|
||||||
ARGBToYJRow_C;
|
ARGBToYJRow_C;
|
||||||
#if defined(HAS_ARGBTOYJROW_SSSE3)
|
#if defined(HAS_ARGBTOYJROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
|
||||||
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
|
ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
|
||||||
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
|
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
|
||||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
|
||||||
if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
|
if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
|
||||||
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
||||||
}
|
}
|
||||||
@ -1021,16 +1021,16 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
|||||||
ARGBToYJRow = ARGBToYJRow_NEON;
|
ARGBToYJRow = ARGBToYJRow_NEON;
|
||||||
}
|
}
|
||||||
if (width >= 16) {
|
if (width >= 16) {
|
||||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
ARGBToUVJRow = ARGBToUVJRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int y = 0; y < height - 1; y += 2) {
|
for (int y = 0; y < height - 1; y += 2) {
|
||||||
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
|
ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
|
||||||
ARGBToYJRow(src_argb, dst_yj, width);
|
ARGBToYJRow(src_argb, dst_yj, width);
|
||||||
ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
|
ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
|
||||||
src_argb += src_stride_argb * 2;
|
src_argb += src_stride_argb * 2;
|
||||||
@ -1039,7 +1039,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
|||||||
dst_v += dst_stride_v;
|
dst_v += dst_stride_v;
|
||||||
}
|
}
|
||||||
if (height & 1) {
|
if (height & 1) {
|
||||||
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
|
ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
|
||||||
ARGBToYJRow(src_argb, dst_yj, width);
|
ARGBToYJRow(src_argb, dst_yj, width);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@ -293,6 +293,8 @@ UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
|
|||||||
#endif
|
#endif
|
||||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||||
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
|
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
|
||||||
|
UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C,
|
||||||
|
4, 15)
|
||||||
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
|
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
|
||||||
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
|
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
|
||||||
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
|
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
|
||||||
@ -301,6 +303,7 @@ UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
|
|||||||
#endif
|
#endif
|
||||||
#ifdef HAS_ARGBTOUVROW_NEON
|
#ifdef HAS_ARGBTOUVROW_NEON
|
||||||
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
|
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
|
||||||
|
UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15)
|
||||||
UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
|
UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
|
||||||
UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
|
UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
|
||||||
UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
|
UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
|
||||||
|
|||||||
@ -256,25 +256,44 @@ MAKEROWY(RGB24, 2, 1, 0, 3)
|
|||||||
MAKEROWY(RAW, 0, 1, 2, 3)
|
MAKEROWY(RAW, 0, 1, 2, 3)
|
||||||
#undef MAKEROWY
|
#undef MAKEROWY
|
||||||
|
|
||||||
// BT.601 mpeg range
|
// JPeg uses a variation on BT.601-1 full range
|
||||||
|
// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
|
||||||
|
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
|
||||||
|
// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
|
||||||
|
// BT.601 Mpeg range uses:
|
||||||
// b 0.1016 * 255 = 25.908 = 25
|
// b 0.1016 * 255 = 25.908 = 25
|
||||||
// g 0.5078 * 255 = 129.489 = 129
|
// g 0.5078 * 255 = 129.489 = 129
|
||||||
// r 0.2578 * 255 = 65.739 = 66
|
// r 0.2578 * 255 = 65.739 = 66
|
||||||
// = 0.8672. 1/.8672 = 1.1531
|
// JPeg 8 bit Y (not used):
|
||||||
// BT.601 full range 8 bit (not used)
|
// b 0.11400 * 256 = 29.184 = 29
|
||||||
// b 0.1016 * 1.1531 = 0.1172 * 255 = 29.886 = 30
|
// g 0.58700 * 256 = 150.272 = 150
|
||||||
// g 0.5078 * 1.1531 = 0.5855 * 255 = 149.3025 = 149
|
// r 0.29900 * 256 = 76.544 = 77
|
||||||
// r 0.2578 * 1.1531 = 0.2973 * 255 = 75.8115 = 76
|
// JPeg 7 bit Y:
|
||||||
// 30 + 149 + 76 = 255
|
// b 0.11400 * 128 = 14.592 = 15
|
||||||
// BT.601 full range 7 bit
|
// g 0.58700 * 128 = 75.136 = 75
|
||||||
// b 0.1172 * 127 = 14.8844 = 15
|
// r 0.29900 * 128 = 38.272 = 38
|
||||||
// g 0.5855 * 127 = 74.35855 = 74
|
// JPeg 8 bit U:
|
||||||
// r 0.2973 * 127 = 37.7571 = 38
|
// b 0.50000 * 255 = 127.5 = 127
|
||||||
|
// g -0.33126 * 255 = -84.4713 = -84
|
||||||
|
// r -0.16874 * 255 = -43.0287 = -43
|
||||||
|
// JPeg 8 bit V:
|
||||||
|
// b -0.08131 * 255 = -20.73405 = -20
|
||||||
|
// g -0.41869 * 255 = -106.76595 = -107
|
||||||
|
// r 0.50000 * 255 = 127.5 = 127
|
||||||
|
|
||||||
static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
|
static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
|
||||||
return (38 * r + 74 * g + 15 * b + 64) >> 7;
|
return (38 * r + 75 * g + 15 * b + 64) >> 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
|
||||||
|
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
|
||||||
|
}
|
||||||
|
static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
|
||||||
|
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
|
||||||
|
|
||||||
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
||||||
void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
|
void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
|
||||||
for (int x = 0; x < width; ++x) { \
|
for (int x = 0; x < width; ++x) { \
|
||||||
@ -283,6 +302,31 @@ void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
|
|||||||
dst_y += 1; \
|
dst_y += 1; \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
|
void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) { \
|
||||||
|
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
|
||||||
|
for (int x = 0; x < width - 1; x += 2) { \
|
||||||
|
uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
|
||||||
|
AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
|
||||||
|
uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
|
||||||
|
AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
|
||||||
|
uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
|
||||||
|
AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
|
||||||
|
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||||
|
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||||
|
src_rgb0 += BPP * 2; \
|
||||||
|
src_rgb1 += BPP * 2; \
|
||||||
|
dst_u += 1; \
|
||||||
|
dst_v += 1; \
|
||||||
|
} \
|
||||||
|
if (width & 1) { \
|
||||||
|
uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
|
||||||
|
uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
|
||||||
|
uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
|
||||||
|
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||||
|
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
MAKEROWYJ(ARGB, 2, 1, 0, 4)
|
MAKEROWYJ(ARGB, 2, 1, 0, 4)
|
||||||
#undef MAKEROWYJ
|
#undef MAKEROWYJ
|
||||||
@ -537,16 +581,9 @@ void ARGBToUV411Row_C(const uint8* src_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// http://en.wikipedia.org/wiki/Grayscale.
|
|
||||||
// 0.11 * B + 0.59 * G + 0.30 * R
|
|
||||||
// Coefficients rounded to multiple of 2 for consistency with SSSE3 version.
|
|
||||||
static __inline int RGBToGray(uint8 r, uint8 g, uint8 b) {
|
|
||||||
return (28 * b + 152 * g + 76 * r) >> 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
for (int x = 0; x < width; ++x) {
|
for (int x = 0; x < width; ++x) {
|
||||||
uint8 y = RGBToGray(src_argb[2], src_argb[1], src_argb[0]);
|
uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
|
||||||
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
|
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
|
||||||
dst_argb[3] = src_argb[3];
|
dst_argb[3] = src_argb[3];
|
||||||
dst_argb += 4;
|
dst_argb += 4;
|
||||||
|
|||||||
@ -1338,9 +1338,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
|||||||
|
|
||||||
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"vmov.u8 d24, #15 \n" // B * 0.1172 coefficient
|
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
|
||||||
"vmov.u8 d25, #74 \n" // G * 0.5855 coefficient
|
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
|
||||||
"vmov.u8 d26, #38 \n" // R * 0.2973 coefficient
|
"vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
|
||||||
".p2align 2 \n"
|
".p2align 2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||||
@ -1348,7 +1348,7 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
|||||||
"vmull.u8 q2, d0, d24 \n" // B
|
"vmull.u8 q2, d0, d24 \n" // B
|
||||||
"vmlal.u8 q2, d1, d25 \n" // G
|
"vmlal.u8 q2, d1, d25 \n" // G
|
||||||
"vmlal.u8 q2, d2, d26 \n" // R
|
"vmlal.u8 q2, d2, d26 \n" // R
|
||||||
"vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
|
"vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y
|
||||||
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
|
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
@ -1547,6 +1547,45 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(fbarchard): Subsample match C code.
|
||||||
|
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
|
asm volatile (
|
||||||
|
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||||
|
"vmov.s16 q10, #127 / 4 \n" // UB / VR 0.500 coefficient
|
||||||
|
"vmov.s16 q11, #84 / 4 \n" // UG -0.33126 coefficient
|
||||||
|
"vmov.s16 q12, #43 / 4 \n" // UR -0.16874 coefficient
|
||||||
|
"vmov.s16 q13, #20 / 4 \n" // VB -0.08131 coefficient
|
||||||
|
"vmov.s16 q14, #107 / 4 \n" // VG -0.41869 coefficient
|
||||||
|
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||||
|
".p2align 2 \n"
|
||||||
|
"1: \n"
|
||||||
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||||
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
||||||
|
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
|
||||||
|
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||||
|
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
|
||||||
|
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
|
||||||
|
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
|
||||||
|
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
||||||
|
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||||
|
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||||
|
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||||
|
RGBTOUV(q0, q1, q2)
|
||||||
|
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||||
|
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||||
|
"bgt 1b \n"
|
||||||
|
: "+r"(src_argb), // %0
|
||||||
|
"+r"(src_stride_argb), // %1
|
||||||
|
"+r"(dst_u), // %2
|
||||||
|
"+r"(dst_v), // %3
|
||||||
|
"+r"(pix) // %4
|
||||||
|
:
|
||||||
|
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||||
|
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||||
uint8* dst_u, uint8* dst_v, int pix) {
|
uint8* dst_u, uint8* dst_v, int pix) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -2365,13 +2404,13 @@ void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
|
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
|
||||||
// Similar to ARGBToY but different constants, no round and stores ARGB.
|
// Similar to ARGBToYJ but stores ARGB.
|
||||||
// C code is (28 * b + 152 * g + 76 * r) >> 8;
|
// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
|
||||||
void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"vmov.u8 d24, #14 \n" // B * 0.1016 coefficient
|
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
|
||||||
"vmov.u8 d25, #76 \n" // G * 0.5078 coefficient
|
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
|
||||||
"vmov.u8 d26, #38 \n" // R * 0.2578 coefficient
|
"vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
|
||||||
".p2align 2 \n"
|
".p2align 2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||||
@ -2379,7 +2418,7 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
"vmull.u8 q2, d0, d24 \n" // B
|
"vmull.u8 q2, d0, d24 \n" // B
|
||||||
"vmlal.u8 q2, d1, d25 \n" // G
|
"vmlal.u8 q2, d1, d25 \n" // G
|
||||||
"vmlal.u8 q2, d2, d26 \n" // R
|
"vmlal.u8 q2, d2, d26 \n" // R
|
||||||
"vqshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit B
|
"vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B
|
||||||
"vmov d1, d0 \n" // G
|
"vmov d1, d0 \n" // G
|
||||||
"vmov d2, d0 \n" // R
|
"vmov d2, d0 \n" // R
|
||||||
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels.
|
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels.
|
||||||
|
|||||||
@ -37,17 +37,25 @@ CONST vec8 kARGBToY = {
|
|||||||
|
|
||||||
// JPeg full range.
|
// JPeg full range.
|
||||||
CONST vec8 kARGBToYJ = {
|
CONST vec8 kARGBToYJ = {
|
||||||
15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0
|
15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
CONST vec8 kARGBToU = {
|
CONST vec8 kARGBToU = {
|
||||||
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
CONST vec8 kARGBToUJ = {
|
||||||
|
127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
|
||||||
|
};
|
||||||
|
|
||||||
CONST vec8 kARGBToV = {
|
CONST vec8 kARGBToV = {
|
||||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
CONST vec8 kARGBToVJ = {
|
||||||
|
-20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
|
||||||
|
};
|
||||||
|
|
||||||
// Constants for BGRA
|
// Constants for BGRA
|
||||||
CONST vec8 kBGRAToY = {
|
CONST vec8 kBGRAToY = {
|
||||||
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
|
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
|
||||||
@ -100,6 +108,10 @@ CONST uvec8 kAddUV128 = {
|
|||||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
||||||
};
|
};
|
||||||
|
|
||||||
|
CONST uvec16 kAddUVJ128 = {
|
||||||
|
0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
|
||||||
|
};
|
||||||
|
|
||||||
// Shuffle table for converting RGB24 to ARGB.
|
// Shuffle table for converting RGB24 to ARGB.
|
||||||
CONST uvec8 kShuffleMaskRGB24ToARGB = {
|
CONST uvec8 kShuffleMaskRGB24ToARGB = {
|
||||||
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
|
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
|
||||||
@ -830,6 +842,69 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3.
|
||||||
|
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
asm volatile (
|
||||||
|
"movdqa %0,%%xmm4 \n"
|
||||||
|
"movdqa %1,%%xmm3 \n"
|
||||||
|
"movdqa %2,%%xmm5 \n"
|
||||||
|
:
|
||||||
|
: "m"(kARGBToUJ), // %0
|
||||||
|
"m"(kARGBToVJ), // %1
|
||||||
|
"m"(kAddUVJ128) // %2
|
||||||
|
);
|
||||||
|
asm volatile (
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm0 \n"
|
||||||
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
|
"movdqa 0x20(%0),%%xmm2 \n"
|
||||||
|
"movdqa 0x30(%0),%%xmm6 \n"
|
||||||
|
"pavgb (%0,%4,1),%%xmm0 \n"
|
||||||
|
"pavgb 0x10(%0,%4,1),%%xmm1 \n"
|
||||||
|
"pavgb 0x20(%0,%4,1),%%xmm2 \n"
|
||||||
|
"pavgb 0x30(%0,%4,1),%%xmm6 \n"
|
||||||
|
"lea 0x40(%0),%0 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm7 \n"
|
||||||
|
"shufps $0x88,%%xmm1,%%xmm0 \n"
|
||||||
|
"shufps $0xdd,%%xmm1,%%xmm7 \n"
|
||||||
|
"pavgb %%xmm7,%%xmm0 \n"
|
||||||
|
"movdqa %%xmm2,%%xmm7 \n"
|
||||||
|
"shufps $0x88,%%xmm6,%%xmm2 \n"
|
||||||
|
"shufps $0xdd,%%xmm6,%%xmm7 \n"
|
||||||
|
"pavgb %%xmm7,%%xmm2 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm1 \n"
|
||||||
|
"movdqa %%xmm2,%%xmm6 \n"
|
||||||
|
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||||
|
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||||
|
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||||
|
"pmaddubsw %%xmm3,%%xmm6 \n"
|
||||||
|
"phaddw %%xmm2,%%xmm0 \n"
|
||||||
|
"phaddw %%xmm6,%%xmm1 \n"
|
||||||
|
"paddw %%xmm5,%%xmm0 \n"
|
||||||
|
"paddw %%xmm5,%%xmm1 \n"
|
||||||
|
"psraw $0x8,%%xmm0 \n"
|
||||||
|
"psraw $0x8,%%xmm1 \n"
|
||||||
|
"packsswb %%xmm1,%%xmm0 \n"
|
||||||
|
"sub $0x10,%3 \n"
|
||||||
|
"movlps %%xmm0,(%1) \n"
|
||||||
|
"movhps %%xmm0,(%1,%2,1) \n"
|
||||||
|
"lea 0x8(%1),%1 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_argb0), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+rm"(width) // %3
|
||||||
|
: "r"(static_cast<intptr_t>(src_stride_argb))
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -895,6 +970,72 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
asm volatile (
|
||||||
|
"movdqa %0,%%xmm4 \n"
|
||||||
|
"movdqa %1,%%xmm3 \n"
|
||||||
|
"movdqa %2,%%xmm5 \n"
|
||||||
|
:
|
||||||
|
: "m"(kARGBToUJ), // %0
|
||||||
|
"m"(kARGBToVJ), // %1
|
||||||
|
"m"(kAddUVJ128) // %2
|
||||||
|
);
|
||||||
|
asm volatile (
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"movdqu (%0),%%xmm0 \n"
|
||||||
|
"movdqu 0x10(%0),%%xmm1 \n"
|
||||||
|
"movdqu 0x20(%0),%%xmm2 \n"
|
||||||
|
"movdqu 0x30(%0),%%xmm6 \n"
|
||||||
|
"movdqu (%0,%4,1),%%xmm7 \n"
|
||||||
|
"pavgb %%xmm7,%%xmm0 \n"
|
||||||
|
"movdqu 0x10(%0,%4,1),%%xmm7 \n"
|
||||||
|
"pavgb %%xmm7,%%xmm1 \n"
|
||||||
|
"movdqu 0x20(%0,%4,1),%%xmm7 \n"
|
||||||
|
"pavgb %%xmm7,%%xmm2 \n"
|
||||||
|
"movdqu 0x30(%0,%4,1),%%xmm7 \n"
|
||||||
|
"pavgb %%xmm7,%%xmm6 \n"
|
||||||
|
"lea 0x40(%0),%0 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm7 \n"
|
||||||
|
"shufps $0x88,%%xmm1,%%xmm0 \n"
|
||||||
|
"shufps $0xdd,%%xmm1,%%xmm7 \n"
|
||||||
|
"pavgb %%xmm7,%%xmm0 \n"
|
||||||
|
"movdqa %%xmm2,%%xmm7 \n"
|
||||||
|
"shufps $0x88,%%xmm6,%%xmm2 \n"
|
||||||
|
"shufps $0xdd,%%xmm6,%%xmm7 \n"
|
||||||
|
"pavgb %%xmm7,%%xmm2 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm1 \n"
|
||||||
|
"movdqa %%xmm2,%%xmm6 \n"
|
||||||
|
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||||
|
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||||
|
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||||
|
"pmaddubsw %%xmm3,%%xmm6 \n"
|
||||||
|
"phaddw %%xmm2,%%xmm0 \n"
|
||||||
|
"phaddw %%xmm6,%%xmm1 \n"
|
||||||
|
"paddw %%xmm5,%%xmm0 \n"
|
||||||
|
"paddw %%xmm5,%%xmm1 \n"
|
||||||
|
"psraw $0x8,%%xmm0 \n"
|
||||||
|
"psraw $0x8,%%xmm1 \n"
|
||||||
|
"packsswb %%xmm1,%%xmm0 \n"
|
||||||
|
"sub $0x10,%3 \n"
|
||||||
|
"movlps %%xmm0,(%1) \n"
|
||||||
|
"movhps %%xmm0,(%1,%2,1) \n"
|
||||||
|
"lea 0x8(%1),%1 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_argb0), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+rm"(width) // %3
|
||||||
|
: "r"(static_cast<intptr_t>(src_stride_argb))
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@ -3764,15 +3905,11 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
|||||||
#endif // HAS_ARGBUNATTENUATEROW_SSE2
|
#endif // HAS_ARGBUNATTENUATEROW_SSE2
|
||||||
|
|
||||||
#ifdef HAS_ARGBGRAYROW_SSSE3
|
#ifdef HAS_ARGBGRAYROW_SSSE3
|
||||||
// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
|
|
||||||
CONST vec8 kARGBToGray = {
|
|
||||||
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
|
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
|
||||||
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
|
"movdqa %4,%%xmm5 \n"
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
|
|
||||||
// 8 pixel loop.
|
// 8 pixel loop.
|
||||||
@ -3783,6 +3920,7 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||||
"phaddw %%xmm1,%%xmm0 \n"
|
"phaddw %%xmm1,%%xmm0 \n"
|
||||||
|
"paddw %%xmm5,%%xmm0 \n"
|
||||||
"psrlw $0x7,%%xmm0 \n"
|
"psrlw $0x7,%%xmm0 \n"
|
||||||
"packuswb %%xmm0,%%xmm0 \n"
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
"movdqa (%0),%%xmm2 \n"
|
"movdqa (%0),%%xmm2 \n"
|
||||||
@ -3805,10 +3943,11 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kARGBToGray) // %3
|
: "m"(kARGBToYJ), // %3
|
||||||
|
"m"(kAddYJ64) // %4
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,7 +27,7 @@ static const vec8 kARGBToY = {
|
|||||||
|
|
||||||
// JPeg full range.
|
// JPeg full range.
|
||||||
static const vec8 kARGBToYJ = {
|
static const vec8 kARGBToYJ = {
|
||||||
15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0
|
15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
static const lvec8 kARGBToY_AVX = {
|
static const lvec8 kARGBToY_AVX = {
|
||||||
@ -39,6 +39,10 @@ static const vec8 kARGBToU = {
|
|||||||
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const vec8 kARGBToUJ = {
|
||||||
|
127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
|
||||||
|
};
|
||||||
|
|
||||||
// TODO(fbarchard): Rename kARGBToU_AVX to kARGBToU and use for SSSE3 version.
|
// TODO(fbarchard): Rename kARGBToU_AVX to kARGBToU and use for SSSE3 version.
|
||||||
static const lvec8 kARGBToU_AVX = {
|
static const lvec8 kARGBToU_AVX = {
|
||||||
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0,
|
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0,
|
||||||
@ -49,6 +53,10 @@ static const vec8 kARGBToV = {
|
|||||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const vec8 kARGBToVJ = {
|
||||||
|
-20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
|
||||||
|
};
|
||||||
|
|
||||||
static const lvec8 kARGBToV_AVX = {
|
static const lvec8 kARGBToV_AVX = {
|
||||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
||||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0
|
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0
|
||||||
@ -124,6 +132,10 @@ static const uvec8 kAddUV128 = {
|
|||||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const uvec16 kAddUVJ128 = {
|
||||||
|
0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
|
||||||
|
};
|
||||||
|
|
||||||
static const ulvec8 kAddUV128_AVX = {
|
static const ulvec8 kAddUV128_AVX = {
|
||||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
||||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
||||||
@ -1087,6 +1099,73 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // src_argb
|
||||||
|
mov esi, [esp + 8 + 8] // src_stride_argb
|
||||||
|
mov edx, [esp + 8 + 12] // dst_u
|
||||||
|
mov edi, [esp + 8 + 16] // dst_v
|
||||||
|
mov ecx, [esp + 8 + 20] // pix
|
||||||
|
movdqa xmm7, kARGBToUJ
|
||||||
|
movdqa xmm6, kARGBToVJ
|
||||||
|
movdqa xmm5, kAddUVJ128
|
||||||
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||||
|
movdqa xmm0, [eax]
|
||||||
|
movdqa xmm1, [eax + 16]
|
||||||
|
movdqa xmm2, [eax + 32]
|
||||||
|
movdqa xmm3, [eax + 48]
|
||||||
|
pavgb xmm0, [eax + esi]
|
||||||
|
pavgb xmm1, [eax + esi + 16]
|
||||||
|
pavgb xmm2, [eax + esi + 32]
|
||||||
|
pavgb xmm3, [eax + esi + 48]
|
||||||
|
lea eax, [eax + 64]
|
||||||
|
movdqa xmm4, xmm0
|
||||||
|
shufps xmm0, xmm1, 0x88
|
||||||
|
shufps xmm4, xmm1, 0xdd
|
||||||
|
pavgb xmm0, xmm4
|
||||||
|
movdqa xmm4, xmm2
|
||||||
|
shufps xmm2, xmm3, 0x88
|
||||||
|
shufps xmm4, xmm3, 0xdd
|
||||||
|
pavgb xmm2, xmm4
|
||||||
|
|
||||||
|
// step 2 - convert to U and V
|
||||||
|
// from here down is very similar to Y code except
|
||||||
|
// instead of 16 different pixels, its 8 pixels of U and 8 of V
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
movdqa xmm3, xmm2
|
||||||
|
pmaddubsw xmm0, xmm7 // U
|
||||||
|
pmaddubsw xmm2, xmm7
|
||||||
|
pmaddubsw xmm1, xmm6 // V
|
||||||
|
pmaddubsw xmm3, xmm6
|
||||||
|
phaddw xmm0, xmm2
|
||||||
|
phaddw xmm1, xmm3
|
||||||
|
paddw xmm0, xmm5 // +.5 rounding -> unsigned
|
||||||
|
paddw xmm1, xmm5
|
||||||
|
psraw xmm0, 8
|
||||||
|
psraw xmm1, 8
|
||||||
|
packsswb xmm0, xmm1
|
||||||
|
|
||||||
|
// step 3 - store 8 U and 8 V values
|
||||||
|
sub ecx, 16
|
||||||
|
movlps qword ptr [edx], xmm0 // U
|
||||||
|
movhps qword ptr [edx + edi], xmm0 // V
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||||
__declspec(naked) __declspec(align(32))
|
__declspec(naked) __declspec(align(32))
|
||||||
void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
|
void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
|
||||||
@ -1223,6 +1302,77 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // src_argb
|
||||||
|
mov esi, [esp + 8 + 8] // src_stride_argb
|
||||||
|
mov edx, [esp + 8 + 12] // dst_u
|
||||||
|
mov edi, [esp + 8 + 16] // dst_v
|
||||||
|
mov ecx, [esp + 8 + 20] // pix
|
||||||
|
movdqa xmm7, kARGBToUJ
|
||||||
|
movdqa xmm6, kARGBToVJ
|
||||||
|
movdqa xmm5, kAddUVJ128
|
||||||
|
sub edi, edx // stride from u to v
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||||
|
movdqu xmm0, [eax]
|
||||||
|
movdqu xmm1, [eax + 16]
|
||||||
|
movdqu xmm2, [eax + 32]
|
||||||
|
movdqu xmm3, [eax + 48]
|
||||||
|
movdqu xmm4, [eax + esi]
|
||||||
|
pavgb xmm0, xmm4
|
||||||
|
movdqu xmm4, [eax + esi + 16]
|
||||||
|
pavgb xmm1, xmm4
|
||||||
|
movdqu xmm4, [eax + esi + 32]
|
||||||
|
pavgb xmm2, xmm4
|
||||||
|
movdqu xmm4, [eax + esi + 48]
|
||||||
|
pavgb xmm3, xmm4
|
||||||
|
lea eax, [eax + 64]
|
||||||
|
movdqa xmm4, xmm0
|
||||||
|
shufps xmm0, xmm1, 0x88
|
||||||
|
shufps xmm4, xmm1, 0xdd
|
||||||
|
pavgb xmm0, xmm4
|
||||||
|
movdqa xmm4, xmm2
|
||||||
|
shufps xmm2, xmm3, 0x88
|
||||||
|
shufps xmm4, xmm3, 0xdd
|
||||||
|
pavgb xmm2, xmm4
|
||||||
|
|
||||||
|
// step 2 - convert to U and V
|
||||||
|
// from here down is very similar to Y code except
|
||||||
|
// instead of 16 different pixels, its 8 pixels of U and 8 of V
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
movdqa xmm3, xmm2
|
||||||
|
pmaddubsw xmm0, xmm7 // U
|
||||||
|
pmaddubsw xmm2, xmm7
|
||||||
|
pmaddubsw xmm1, xmm6 // V
|
||||||
|
pmaddubsw xmm3, xmm6
|
||||||
|
phaddw xmm0, xmm2
|
||||||
|
phaddw xmm1, xmm3
|
||||||
|
paddw xmm0, xmm5 // +.5 rounding -> unsigned
|
||||||
|
paddw xmm1, xmm5
|
||||||
|
psraw xmm0, 8
|
||||||
|
psraw xmm1, 8
|
||||||
|
packsswb xmm0, xmm1
|
||||||
|
|
||||||
|
// step 3 - store 8 U and 8 V values
|
||||||
|
sub ecx, 16
|
||||||
|
movlps qword ptr [edx], xmm0 // U
|
||||||
|
movhps qword ptr [edx + edi], xmm0 // V
|
||||||
|
lea edx, [edx + 8]
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
|
void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
|
||||||
uint8* dst_u, uint8* dst_v, int width) {
|
uint8* dst_u, uint8* dst_v, int width) {
|
||||||
@ -4597,11 +4747,6 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
|
|||||||
#endif // HAS_ARGBATTENUATEROW_AVX2
|
#endif // HAS_ARGBATTENUATEROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBGRAYROW_SSSE3
|
#ifdef HAS_ARGBGRAYROW_SSSE3
|
||||||
// Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R
|
|
||||||
static const vec8 kARGBToGray = {
|
|
||||||
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
|
|
||||||
};
|
|
||||||
|
|
||||||
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
|
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
@ -4609,7 +4754,8 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
mov eax, [esp + 4] /* src_argb */
|
mov eax, [esp + 4] /* src_argb */
|
||||||
mov edx, [esp + 8] /* dst_argb */
|
mov edx, [esp + 8] /* dst_argb */
|
||||||
mov ecx, [esp + 12] /* width */
|
mov ecx, [esp + 12] /* width */
|
||||||
movdqa xmm4, kARGBToGray
|
movdqa xmm4, kARGBToYJ
|
||||||
|
movdqa xmm5, kAddYJ64
|
||||||
sub edx, eax
|
sub edx, eax
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
@ -4619,6 +4765,7 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
pmaddubsw xmm0, xmm4
|
pmaddubsw xmm0, xmm4
|
||||||
pmaddubsw xmm1, xmm4
|
pmaddubsw xmm1, xmm4
|
||||||
phaddw xmm0, xmm1
|
phaddw xmm0, xmm1
|
||||||
|
paddw xmm0, xmm5 // Add .5 for rounding.
|
||||||
psrlw xmm0, 7
|
psrlw xmm0, 7
|
||||||
packuswb xmm0, xmm0 // 8 G bytes
|
packuswb xmm0, xmm0 // 8 G bytes
|
||||||
movdqa xmm2, [eax] // A
|
movdqa xmm2, [eax] // A
|
||||||
|
|||||||
@ -689,7 +689,11 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
|
|||||||
benchmark_width_, DIFF, _Opt, +, 0)
|
benchmark_width_, DIFF, _Opt, +, 0)
|
||||||
|
|
||||||
TESTATOPLANAR(ARGB, 4, I420, 2, 2, 4)
|
TESTATOPLANAR(ARGB, 4, I420, 2, 2, 4)
|
||||||
|
#ifdef __arm__
|
||||||
TESTATOPLANAR(ARGB, 4, J420, 2, 2, 4)
|
TESTATOPLANAR(ARGB, 4, J420, 2, 2, 4)
|
||||||
|
#else
|
||||||
|
TESTATOPLANAR(ARGB, 4, J420, 2, 2, 0)
|
||||||
|
#endif
|
||||||
TESTATOPLANAR(BGRA, 4, I420, 2, 2, 4)
|
TESTATOPLANAR(BGRA, 4, I420, 2, 2, 4)
|
||||||
TESTATOPLANAR(ABGR, 4, I420, 2, 2, 4)
|
TESTATOPLANAR(ABGR, 4, I420, 2, 2, 4)
|
||||||
TESTATOPLANAR(RGBA, 4, I420, 2, 2, 4)
|
TESTATOPLANAR(RGBA, 4, I420, 2, 2, 4)
|
||||||
|
|||||||
@ -269,7 +269,6 @@ TEST_F(libyuvTest, TestARGBComputeCumulativeSum) {
|
|||||||
|
|
||||||
TEST_F(libyuvTest, TestARGBGray) {
|
TEST_F(libyuvTest, TestARGBGray) {
|
||||||
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
||||||
|
|
||||||
// Test blue
|
// Test blue
|
||||||
orig_pixels[0][0] = 255u;
|
orig_pixels[0][0] = 255u;
|
||||||
orig_pixels[0][1] = 0u;
|
orig_pixels[0][1] = 0u;
|
||||||
@ -285,30 +284,47 @@ TEST_F(libyuvTest, TestARGBGray) {
|
|||||||
orig_pixels[2][1] = 0u;
|
orig_pixels[2][1] = 0u;
|
||||||
orig_pixels[2][2] = 255u;
|
orig_pixels[2][2] = 255u;
|
||||||
orig_pixels[2][3] = 255u;
|
orig_pixels[2][3] = 255u;
|
||||||
|
// Test black
|
||||||
|
orig_pixels[3][0] = 0u;
|
||||||
|
orig_pixels[3][1] = 0u;
|
||||||
|
orig_pixels[3][2] = 0u;
|
||||||
|
orig_pixels[3][3] = 255u;
|
||||||
|
// Test white
|
||||||
|
orig_pixels[4][0] = 255u;
|
||||||
|
orig_pixels[4][1] = 255u;
|
||||||
|
orig_pixels[4][2] = 255u;
|
||||||
|
orig_pixels[4][3] = 255u;
|
||||||
// Test color
|
// Test color
|
||||||
orig_pixels[3][0] = 16u;
|
orig_pixels[5][0] = 16u;
|
||||||
orig_pixels[3][1] = 64u;
|
orig_pixels[5][1] = 64u;
|
||||||
orig_pixels[3][2] = 192u;
|
orig_pixels[5][2] = 192u;
|
||||||
orig_pixels[3][3] = 224u;
|
orig_pixels[5][3] = 224u;
|
||||||
// Do 16 to test asm version.
|
// Do 16 to test asm version.
|
||||||
ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
|
ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
|
||||||
EXPECT_EQ(27u, orig_pixels[0][0]);
|
EXPECT_EQ(30u, orig_pixels[0][0]);
|
||||||
EXPECT_EQ(27u, orig_pixels[0][1]);
|
EXPECT_EQ(30u, orig_pixels[0][1]);
|
||||||
EXPECT_EQ(27u, orig_pixels[0][2]);
|
EXPECT_EQ(30u, orig_pixels[0][2]);
|
||||||
EXPECT_EQ(128u, orig_pixels[0][3]);
|
EXPECT_EQ(128u, orig_pixels[0][3]);
|
||||||
EXPECT_EQ(151u, orig_pixels[1][0]);
|
EXPECT_EQ(149u, orig_pixels[1][0]);
|
||||||
EXPECT_EQ(151u, orig_pixels[1][1]);
|
EXPECT_EQ(149u, orig_pixels[1][1]);
|
||||||
EXPECT_EQ(151u, orig_pixels[1][2]);
|
EXPECT_EQ(149u, orig_pixels[1][2]);
|
||||||
EXPECT_EQ(0u, orig_pixels[1][3]);
|
EXPECT_EQ(0u, orig_pixels[1][3]);
|
||||||
EXPECT_EQ(75u, orig_pixels[2][0]);
|
EXPECT_EQ(76u, orig_pixels[2][0]);
|
||||||
EXPECT_EQ(75u, orig_pixels[2][1]);
|
EXPECT_EQ(76u, orig_pixels[2][1]);
|
||||||
EXPECT_EQ(75u, orig_pixels[2][2]);
|
EXPECT_EQ(76u, orig_pixels[2][2]);
|
||||||
EXPECT_EQ(255u, orig_pixels[2][3]);
|
EXPECT_EQ(255u, orig_pixels[2][3]);
|
||||||
EXPECT_EQ(96u, orig_pixels[3][0]);
|
EXPECT_EQ(0u, orig_pixels[3][0]);
|
||||||
EXPECT_EQ(96u, orig_pixels[3][1]);
|
EXPECT_EQ(0u, orig_pixels[3][1]);
|
||||||
EXPECT_EQ(96u, orig_pixels[3][2]);
|
EXPECT_EQ(0u, orig_pixels[3][2]);
|
||||||
EXPECT_EQ(224u, orig_pixels[3][3]);
|
EXPECT_EQ(255u, orig_pixels[3][3]);
|
||||||
|
EXPECT_EQ(255u, orig_pixels[4][0]);
|
||||||
|
EXPECT_EQ(255u, orig_pixels[4][1]);
|
||||||
|
EXPECT_EQ(255u, orig_pixels[4][2]);
|
||||||
|
EXPECT_EQ(255u, orig_pixels[4][3]);
|
||||||
|
EXPECT_EQ(96u, orig_pixels[5][0]);
|
||||||
|
EXPECT_EQ(96u, orig_pixels[5][1]);
|
||||||
|
EXPECT_EQ(96u, orig_pixels[5][2]);
|
||||||
|
EXPECT_EQ(224u, orig_pixels[5][3]);
|
||||||
for (int i = 0; i < 256; ++i) {
|
for (int i = 0; i < 256; ++i) {
|
||||||
orig_pixels[i][0] = i;
|
orig_pixels[i][0] = i;
|
||||||
orig_pixels[i][1] = i / 2;
|
orig_pixels[i][1] = i / 2;
|
||||||
@ -323,7 +339,6 @@ TEST_F(libyuvTest, TestARGBGray) {
|
|||||||
TEST_F(libyuvTest, TestARGBGrayTo) {
|
TEST_F(libyuvTest, TestARGBGrayTo) {
|
||||||
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
||||||
SIMD_ALIGNED(uint8 gray_pixels[256][4]);
|
SIMD_ALIGNED(uint8 gray_pixels[256][4]);
|
||||||
|
|
||||||
// Test blue
|
// Test blue
|
||||||
orig_pixels[0][0] = 255u;
|
orig_pixels[0][0] = 255u;
|
||||||
orig_pixels[0][1] = 0u;
|
orig_pixels[0][1] = 0u;
|
||||||
@ -339,30 +354,47 @@ TEST_F(libyuvTest, TestARGBGrayTo) {
|
|||||||
orig_pixels[2][1] = 0u;
|
orig_pixels[2][1] = 0u;
|
||||||
orig_pixels[2][2] = 255u;
|
orig_pixels[2][2] = 255u;
|
||||||
orig_pixels[2][3] = 255u;
|
orig_pixels[2][3] = 255u;
|
||||||
|
// Test black
|
||||||
|
orig_pixels[3][0] = 0u;
|
||||||
|
orig_pixels[3][1] = 0u;
|
||||||
|
orig_pixels[3][2] = 0u;
|
||||||
|
orig_pixels[3][3] = 255u;
|
||||||
|
// Test white
|
||||||
|
orig_pixels[4][0] = 255u;
|
||||||
|
orig_pixels[4][1] = 255u;
|
||||||
|
orig_pixels[4][2] = 255u;
|
||||||
|
orig_pixels[4][3] = 255u;
|
||||||
// Test color
|
// Test color
|
||||||
orig_pixels[3][0] = 16u;
|
orig_pixels[5][0] = 16u;
|
||||||
orig_pixels[3][1] = 64u;
|
orig_pixels[5][1] = 64u;
|
||||||
orig_pixels[3][2] = 192u;
|
orig_pixels[5][2] = 192u;
|
||||||
orig_pixels[3][3] = 224u;
|
orig_pixels[5][3] = 224u;
|
||||||
// Do 16 to test asm version.
|
// Do 16 to test asm version.
|
||||||
ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
|
ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
|
||||||
EXPECT_EQ(27u, gray_pixels[0][0]);
|
EXPECT_EQ(30u, gray_pixels[0][0]);
|
||||||
EXPECT_EQ(27u, gray_pixels[0][1]);
|
EXPECT_EQ(30u, gray_pixels[0][1]);
|
||||||
EXPECT_EQ(27u, gray_pixels[0][2]);
|
EXPECT_EQ(30u, gray_pixels[0][2]);
|
||||||
EXPECT_EQ(128u, gray_pixels[0][3]);
|
EXPECT_EQ(128u, gray_pixels[0][3]);
|
||||||
EXPECT_EQ(151u, gray_pixels[1][0]);
|
EXPECT_EQ(149u, gray_pixels[1][0]);
|
||||||
EXPECT_EQ(151u, gray_pixels[1][1]);
|
EXPECT_EQ(149u, gray_pixels[1][1]);
|
||||||
EXPECT_EQ(151u, gray_pixels[1][2]);
|
EXPECT_EQ(149u, gray_pixels[1][2]);
|
||||||
EXPECT_EQ(0u, gray_pixels[1][3]);
|
EXPECT_EQ(0u, gray_pixels[1][3]);
|
||||||
EXPECT_EQ(75u, gray_pixels[2][0]);
|
EXPECT_EQ(76u, gray_pixels[2][0]);
|
||||||
EXPECT_EQ(75u, gray_pixels[2][1]);
|
EXPECT_EQ(76u, gray_pixels[2][1]);
|
||||||
EXPECT_EQ(75u, gray_pixels[2][2]);
|
EXPECT_EQ(76u, gray_pixels[2][2]);
|
||||||
EXPECT_EQ(255u, gray_pixels[2][3]);
|
EXPECT_EQ(255u, gray_pixels[2][3]);
|
||||||
EXPECT_EQ(96u, gray_pixels[3][0]);
|
EXPECT_EQ(0u, gray_pixels[3][0]);
|
||||||
EXPECT_EQ(96u, gray_pixels[3][1]);
|
EXPECT_EQ(0u, gray_pixels[3][1]);
|
||||||
EXPECT_EQ(96u, gray_pixels[3][2]);
|
EXPECT_EQ(0u, gray_pixels[3][2]);
|
||||||
EXPECT_EQ(224u, gray_pixels[3][3]);
|
EXPECT_EQ(255u, gray_pixels[3][3]);
|
||||||
|
EXPECT_EQ(255u, gray_pixels[4][0]);
|
||||||
|
EXPECT_EQ(255u, gray_pixels[4][1]);
|
||||||
|
EXPECT_EQ(255u, gray_pixels[4][2]);
|
||||||
|
EXPECT_EQ(255u, gray_pixels[4][3]);
|
||||||
|
EXPECT_EQ(96u, gray_pixels[5][0]);
|
||||||
|
EXPECT_EQ(96u, gray_pixels[5][1]);
|
||||||
|
EXPECT_EQ(96u, gray_pixels[5][2]);
|
||||||
|
EXPECT_EQ(224u, gray_pixels[5][3]);
|
||||||
for (int i = 0; i < 256; ++i) {
|
for (int i = 0; i < 256; ++i) {
|
||||||
orig_pixels[i][0] = i;
|
orig_pixels[i][0] = i;
|
||||||
orig_pixels[i][1] = i / 2;
|
orig_pixels[i][1] = i / 2;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user