mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Recomputed JPeg coefficients normalized to 128. Apply to ARGBGray function reusing YJ function/coefficients and rounding.
BUG=201 TESTED=Gray unittest improved Review URL: https://webrtc-codereview.appspot.com/1269006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@629 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
6a352141ef
commit
050b39a5cb
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 628
|
||||
Version: 629
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -54,6 +54,7 @@ extern "C" {
|
||||
#define HAS_ARGBTOUV422ROW_SSSE3
|
||||
#define HAS_ARGBTOUV444ROW_SSSE3
|
||||
#define HAS_ARGBTOUVROW_SSSE3
|
||||
#define HAS_ARGBTOUVJROW_SSSE3
|
||||
#define HAS_ARGBTOYROW_SSSE3
|
||||
#define HAS_ARGBTOYJROW_SSSE3
|
||||
#define HAS_BGRATOUVROW_SSSE3
|
||||
@ -203,6 +204,7 @@ extern "C" {
|
||||
#define HAS_ARGBTOUV422ROW_NEON
|
||||
#define HAS_ARGBTOUV444ROW_NEON
|
||||
#define HAS_ARGBTOUVROW_NEON
|
||||
#define HAS_ARGBTOUVJROW_NEON
|
||||
#define HAS_ARGBTOYROW_NEON
|
||||
#define HAS_ARGBTOYJROW_NEON
|
||||
#define HAS_BGRATOUVROW_NEON
|
||||
@ -423,6 +425,8 @@ void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix);
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
@ -481,6 +485,8 @@ void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||
@ -489,6 +495,8 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||
@ -497,6 +505,8 @@ void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
|
||||
@ -511,6 +521,8 @@ void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int pix);
|
||||
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
@ -531,6 +543,8 @@ void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int width);
|
||||
void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 628
|
||||
#define LIBYUV_VERSION 629
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -994,19 +994,19 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
|
||||
ARGBToYJRow_C;
|
||||
#if defined(HAS_ARGBTOYJROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
|
||||
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
|
||||
ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
|
||||
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
|
||||
if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
|
||||
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
||||
}
|
||||
@ -1021,16 +1021,16 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
ARGBToYJRow = ARGBToYJRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
ARGBToUVJRow = ARGBToUVJRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < height - 1; y += 2) {
|
||||
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
|
||||
ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
|
||||
ARGBToYJRow(src_argb, dst_yj, width);
|
||||
ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
|
||||
src_argb += src_stride_argb * 2;
|
||||
@ -1039,7 +1039,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
|
||||
ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
|
||||
ARGBToYJRow(src_argb, dst_yj, width);
|
||||
}
|
||||
return 0;
|
||||
|
||||
@ -293,6 +293,8 @@ UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
|
||||
UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C,
|
||||
4, 15)
|
||||
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
|
||||
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
|
||||
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
|
||||
@ -301,6 +303,7 @@ UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVROW_NEON
|
||||
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
|
||||
UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15)
|
||||
UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
|
||||
UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
|
||||
UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
|
||||
|
||||
@ -256,25 +256,44 @@ MAKEROWY(RGB24, 2, 1, 0, 3)
|
||||
MAKEROWY(RAW, 0, 1, 2, 3)
|
||||
#undef MAKEROWY
|
||||
|
||||
// BT.601 mpeg range
|
||||
// JPeg uses a variation on BT.601-1 full range
|
||||
// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
|
||||
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
|
||||
// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
|
||||
// BT.601 Mpeg range uses:
|
||||
// b 0.1016 * 255 = 25.908 = 25
|
||||
// g 0.5078 * 255 = 129.489 = 129
|
||||
// r 0.2578 * 255 = 65.739 = 66
|
||||
// = 0.8672. 1/.8672 = 1.1531
|
||||
// BT.601 full range 8 bit (not used)
|
||||
// b 0.1016 * 1.1531 = 0.1172 * 255 = 29.886 = 30
|
||||
// g 0.5078 * 1.1531 = 0.5855 * 255 = 149.3025 = 149
|
||||
// r 0.2578 * 1.1531 = 0.2973 * 255 = 75.8115 = 76
|
||||
// 30 + 149 + 76 = 255
|
||||
// BT.601 full range 7 bit
|
||||
// b 0.1172 * 127 = 14.8844 = 15
|
||||
// g 0.5855 * 127 = 74.35855 = 74
|
||||
// r 0.2973 * 127 = 37.7571 = 38
|
||||
// JPeg 8 bit Y (not used):
|
||||
// b 0.11400 * 256 = 29.184 = 29
|
||||
// g 0.58700 * 256 = 150.272 = 150
|
||||
// r 0.29900 * 256 = 76.544 = 77
|
||||
// JPeg 7 bit Y:
|
||||
// b 0.11400 * 128 = 14.592 = 15
|
||||
// g 0.58700 * 128 = 75.136 = 75
|
||||
// r 0.29900 * 128 = 38.272 = 38
|
||||
// JPeg 8 bit U:
|
||||
// b 0.50000 * 255 = 127.5 = 127
|
||||
// g -0.33126 * 255 = -84.4713 = -84
|
||||
// r -0.16874 * 255 = -43.0287 = -43
|
||||
// JPeg 8 bit V:
|
||||
// b -0.08131 * 255 = -20.73405 = -20
|
||||
// g -0.41869 * 255 = -106.76595 = -107
|
||||
// r 0.50000 * 255 = 127.5 = 127
|
||||
|
||||
static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
|
||||
return (38 * r + 74 * g + 15 * b + 64) >> 7;
|
||||
return (38 * r + 75 * g + 15 * b + 64) >> 7;
|
||||
}
|
||||
|
||||
static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
|
||||
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
|
||||
}
|
||||
static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
|
||||
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
|
||||
}
|
||||
|
||||
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
||||
void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
|
||||
for (int x = 0; x < width; ++x) { \
|
||||
@ -283,6 +302,31 @@ void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
|
||||
dst_y += 1; \
|
||||
} \
|
||||
} \
|
||||
void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
|
||||
uint8* dst_u, uint8* dst_v, int width) { \
|
||||
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
|
||||
for (int x = 0; x < width - 1; x += 2) { \
|
||||
uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
|
||||
AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
|
||||
uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
|
||||
AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
|
||||
uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
|
||||
AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
|
||||
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||
src_rgb0 += BPP * 2; \
|
||||
src_rgb1 += BPP * 2; \
|
||||
dst_u += 1; \
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
|
||||
uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
|
||||
uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
|
||||
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
|
||||
MAKEROWYJ(ARGB, 2, 1, 0, 4)
|
||||
#undef MAKEROWYJ
|
||||
@ -537,16 +581,9 @@ void ARGBToUV411Row_C(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
// http://en.wikipedia.org/wiki/Grayscale.
|
||||
// 0.11 * B + 0.59 * G + 0.30 * R
|
||||
// Coefficients rounded to multiple of 2 for consistency with SSSE3 version.
|
||||
static __inline int RGBToGray(uint8 r, uint8 g, uint8 b) {
|
||||
return (28 * b + 152 * g + 76 * r) >> 8;
|
||||
}
|
||||
|
||||
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
uint8 y = RGBToGray(src_argb[2], src_argb[1], src_argb[0]);
|
||||
uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
|
||||
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
|
||||
dst_argb[3] = src_argb[3];
|
||||
dst_argb += 4;
|
||||
|
||||
@ -1338,9 +1338,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
|
||||
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #15 \n" // B * 0.1172 coefficient
|
||||
"vmov.u8 d25, #74 \n" // G * 0.5855 coefficient
|
||||
"vmov.u8 d26, #38 \n" // R * 0.2973 coefficient
|
||||
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
|
||||
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
|
||||
"vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||
@ -1348,7 +1348,7 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
|
||||
"vmull.u8 q2, d0, d24 \n" // B
|
||||
"vmlal.u8 q2, d1, d25 \n" // G
|
||||
"vmlal.u8 q2, d2, d26 \n" // R
|
||||
"vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y
|
||||
"vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y
|
||||
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
@ -1547,6 +1547,45 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
);
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Subsample match C code.
|
||||
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #127 / 4 \n" // UB / VR 0.500 coefficient
|
||||
"vmov.s16 q11, #84 / 4 \n" // UG -0.33126 coefficient
|
||||
"vmov.s16 q12, #43 / 4 \n" // UR -0.16874 coefficient
|
||||
"vmov.s16 q13, #20 / 4 \n" // VB -0.08131 coefficient
|
||||
"vmov.s16 q14, #107 / 4 \n" // VG -0.41869 coefficient
|
||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
||||
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
|
||||
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
|
||||
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
|
||||
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
|
||||
"subs %4, %4, #16 \n" // 32 processed per loop.
|
||||
RGBTOUV(q0, q1, q2)
|
||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(src_stride_argb), // %1
|
||||
"+r"(dst_u), // %2
|
||||
"+r"(dst_v), // %3
|
||||
"+r"(pix) // %4
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
||||
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
|
||||
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int pix) {
|
||||
asm volatile (
|
||||
@ -2365,13 +2404,13 @@ void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
|
||||
}
|
||||
|
||||
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
|
||||
// Similar to ARGBToY but different constants, no round and stores ARGB.
|
||||
// C code is (28 * b + 152 * g + 76 * r) >> 8;
|
||||
// Similar to ARGBToYJ but stores ARGB.
|
||||
// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
|
||||
void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #14 \n" // B * 0.1016 coefficient
|
||||
"vmov.u8 d25, #76 \n" // G * 0.5078 coefficient
|
||||
"vmov.u8 d26, #38 \n" // R * 0.2578 coefficient
|
||||
"vmov.u8 d24, #15 \n" // B * 0.11400 coefficient
|
||||
"vmov.u8 d25, #75 \n" // G * 0.58700 coefficient
|
||||
"vmov.u8 d26, #38 \n" // R * 0.29900 coefficient
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||
@ -2379,7 +2418,7 @@ void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
"vmull.u8 q2, d0, d24 \n" // B
|
||||
"vmlal.u8 q2, d1, d25 \n" // G
|
||||
"vmlal.u8 q2, d2, d26 \n" // R
|
||||
"vqshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit B
|
||||
"vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B
|
||||
"vmov d1, d0 \n" // G
|
||||
"vmov d2, d0 \n" // R
|
||||
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels.
|
||||
|
||||
@ -37,17 +37,25 @@ CONST vec8 kARGBToY = {
|
||||
|
||||
// JPeg full range.
|
||||
CONST vec8 kARGBToYJ = {
|
||||
15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0
|
||||
15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
|
||||
};
|
||||
|
||||
CONST vec8 kARGBToU = {
|
||||
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
||||
};
|
||||
|
||||
CONST vec8 kARGBToUJ = {
|
||||
127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
|
||||
};
|
||||
|
||||
CONST vec8 kARGBToV = {
|
||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
||||
};
|
||||
|
||||
CONST vec8 kARGBToVJ = {
|
||||
-20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
|
||||
};
|
||||
|
||||
// Constants for BGRA
|
||||
CONST vec8 kBGRAToY = {
|
||||
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
|
||||
@ -100,6 +108,10 @@ CONST uvec8 kAddUV128 = {
|
||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
||||
};
|
||||
|
||||
CONST uvec16 kAddUVJ128 = {
|
||||
0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
|
||||
};
|
||||
|
||||
// Shuffle table for converting RGB24 to ARGB.
|
||||
CONST uvec8 kShuffleMaskRGB24ToARGB = {
|
||||
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
|
||||
@ -830,6 +842,69 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
);
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3.
|
||||
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm4 \n"
|
||||
"movdqa %1,%%xmm3 \n"
|
||||
"movdqa %2,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kARGBToUJ), // %0
|
||||
"m"(kARGBToVJ), // %1
|
||||
"m"(kAddUVJ128) // %2
|
||||
);
|
||||
asm volatile (
|
||||
"sub %1,%2 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm0 \n"
|
||||
"movdqa 0x10(%0),%%xmm1 \n"
|
||||
"movdqa 0x20(%0),%%xmm2 \n"
|
||||
"movdqa 0x30(%0),%%xmm6 \n"
|
||||
"pavgb (%0,%4,1),%%xmm0 \n"
|
||||
"pavgb 0x10(%0,%4,1),%%xmm1 \n"
|
||||
"pavgb 0x20(%0,%4,1),%%xmm2 \n"
|
||||
"pavgb 0x30(%0,%4,1),%%xmm6 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm7 \n"
|
||||
"shufps $0x88,%%xmm1,%%xmm0 \n"
|
||||
"shufps $0xdd,%%xmm1,%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm0 \n"
|
||||
"movdqa %%xmm2,%%xmm7 \n"
|
||||
"shufps $0x88,%%xmm6,%%xmm2 \n"
|
||||
"shufps $0xdd,%%xmm6,%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm6 \n"
|
||||
"phaddw %%xmm2,%%xmm0 \n"
|
||||
"phaddw %%xmm6,%%xmm1 \n"
|
||||
"paddw %%xmm5,%%xmm0 \n"
|
||||
"paddw %%xmm5,%%xmm1 \n"
|
||||
"psraw $0x8,%%xmm0 \n"
|
||||
"psraw $0x8,%%xmm1 \n"
|
||||
"packsswb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%3 \n"
|
||||
"movlps %%xmm0,(%1) \n"
|
||||
"movhps %%xmm0,(%1,%2,1) \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb0), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+rm"(width) // %3
|
||||
: "r"(static_cast<intptr_t>(src_stride_argb))
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
asm volatile (
|
||||
@ -895,6 +970,72 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm4 \n"
|
||||
"movdqa %1,%%xmm3 \n"
|
||||
"movdqa %2,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kARGBToUJ), // %0
|
||||
"m"(kARGBToVJ), // %1
|
||||
"m"(kAddUVJ128) // %2
|
||||
);
|
||||
asm volatile (
|
||||
"sub %1,%2 \n"
|
||||
".p2align 4 \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu 0x10(%0),%%xmm1 \n"
|
||||
"movdqu 0x20(%0),%%xmm2 \n"
|
||||
"movdqu 0x30(%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4,1),%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm0 \n"
|
||||
"movdqu 0x10(%0,%4,1),%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm1 \n"
|
||||
"movdqu 0x20(%0,%4,1),%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm2 \n"
|
||||
"movdqu 0x30(%0,%4,1),%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm6 \n"
|
||||
"lea 0x40(%0),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm7 \n"
|
||||
"shufps $0x88,%%xmm1,%%xmm0 \n"
|
||||
"shufps $0xdd,%%xmm1,%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm0 \n"
|
||||
"movdqa %%xmm2,%%xmm7 \n"
|
||||
"shufps $0x88,%%xmm6,%%xmm2 \n"
|
||||
"shufps $0xdd,%%xmm6,%%xmm7 \n"
|
||||
"pavgb %%xmm7,%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm2 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||
"pmaddubsw %%xmm3,%%xmm6 \n"
|
||||
"phaddw %%xmm2,%%xmm0 \n"
|
||||
"phaddw %%xmm6,%%xmm1 \n"
|
||||
"paddw %%xmm5,%%xmm0 \n"
|
||||
"paddw %%xmm5,%%xmm1 \n"
|
||||
"psraw $0x8,%%xmm0 \n"
|
||||
"psraw $0x8,%%xmm1 \n"
|
||||
"packsswb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%3 \n"
|
||||
"movlps %%xmm0,(%1) \n"
|
||||
"movhps %%xmm0,(%1,%2,1) \n"
|
||||
"lea 0x8(%1),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_argb0), // %0
|
||||
"+r"(dst_u), // %1
|
||||
"+r"(dst_v), // %2
|
||||
"+rm"(width) // %3
|
||||
: "r"(static_cast<intptr_t>(src_stride_argb))
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
@ -3764,15 +3905,11 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||
#endif // HAS_ARGBUNATTENUATEROW_SSE2
|
||||
|
||||
#ifdef HAS_ARGBGRAYROW_SSSE3
|
||||
// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
|
||||
CONST vec8 kARGBToGray = {
|
||||
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
|
||||
};
|
||||
|
||||
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
|
||||
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"movdqa %3,%%xmm4 \n"
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
// 8 pixel loop.
|
||||
@ -3783,6 +3920,7 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
"pmaddubsw %%xmm4,%%xmm0 \n"
|
||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||
"phaddw %%xmm1,%%xmm0 \n"
|
||||
"paddw %%xmm5,%%xmm0 \n"
|
||||
"psrlw $0x7,%%xmm0 \n"
|
||||
"packuswb %%xmm0,%%xmm0 \n"
|
||||
"movdqa (%0),%%xmm2 \n"
|
||||
@ -3805,10 +3943,11 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kARGBToGray) // %3
|
||||
: "m"(kARGBToYJ), // %3
|
||||
"m"(kAddYJ64) // %4
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
@ -27,7 +27,7 @@ static const vec8 kARGBToY = {
|
||||
|
||||
// JPeg full range.
|
||||
static const vec8 kARGBToYJ = {
|
||||
15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0, 15, 74, 38, 0
|
||||
15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0
|
||||
};
|
||||
|
||||
static const lvec8 kARGBToY_AVX = {
|
||||
@ -39,6 +39,10 @@ static const vec8 kARGBToU = {
|
||||
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
|
||||
};
|
||||
|
||||
static const vec8 kARGBToUJ = {
|
||||
127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0
|
||||
};
|
||||
|
||||
// TODO(fbarchard): Rename kARGBToU_AVX to kARGBToU and use for SSSE3 version.
|
||||
static const lvec8 kARGBToU_AVX = {
|
||||
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0,
|
||||
@ -49,6 +53,10 @@ static const vec8 kARGBToV = {
|
||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
||||
};
|
||||
|
||||
static const vec8 kARGBToVJ = {
|
||||
-20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0
|
||||
};
|
||||
|
||||
static const lvec8 kARGBToV_AVX = {
|
||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
|
||||
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0
|
||||
@ -124,6 +132,10 @@ static const uvec8 kAddUV128 = {
|
||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
|
||||
};
|
||||
|
||||
static const uvec16 kAddUVJ128 = {
|
||||
0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u
|
||||
};
|
||||
|
||||
static const ulvec8 kAddUV128_AVX = {
|
||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
||||
@ -1087,6 +1099,73 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // src_argb
|
||||
mov esi, [esp + 8 + 8] // src_stride_argb
|
||||
mov edx, [esp + 8 + 12] // dst_u
|
||||
mov edi, [esp + 8 + 16] // dst_v
|
||||
mov ecx, [esp + 8 + 20] // pix
|
||||
movdqa xmm7, kARGBToUJ
|
||||
movdqa xmm6, kARGBToVJ
|
||||
movdqa xmm5, kAddUVJ128
|
||||
sub edi, edx // stride from u to v
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
movdqa xmm2, [eax + 32]
|
||||
movdqa xmm3, [eax + 48]
|
||||
pavgb xmm0, [eax + esi]
|
||||
pavgb xmm1, [eax + esi + 16]
|
||||
pavgb xmm2, [eax + esi + 32]
|
||||
pavgb xmm3, [eax + esi + 48]
|
||||
lea eax, [eax + 64]
|
||||
movdqa xmm4, xmm0
|
||||
shufps xmm0, xmm1, 0x88
|
||||
shufps xmm4, xmm1, 0xdd
|
||||
pavgb xmm0, xmm4
|
||||
movdqa xmm4, xmm2
|
||||
shufps xmm2, xmm3, 0x88
|
||||
shufps xmm4, xmm3, 0xdd
|
||||
pavgb xmm2, xmm4
|
||||
|
||||
// step 2 - convert to U and V
|
||||
// from here down is very similar to Y code except
|
||||
// instead of 16 different pixels, its 8 pixels of U and 8 of V
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm3, xmm2
|
||||
pmaddubsw xmm0, xmm7 // U
|
||||
pmaddubsw xmm2, xmm7
|
||||
pmaddubsw xmm1, xmm6 // V
|
||||
pmaddubsw xmm3, xmm6
|
||||
phaddw xmm0, xmm2
|
||||
phaddw xmm1, xmm3
|
||||
paddw xmm0, xmm5 // +.5 rounding -> unsigned
|
||||
paddw xmm1, xmm5
|
||||
psraw xmm0, 8
|
||||
psraw xmm1, 8
|
||||
packsswb xmm0, xmm1
|
||||
|
||||
// step 3 - store 8 U and 8 V values
|
||||
sub ecx, 16
|
||||
movlps qword ptr [edx], xmm0 // U
|
||||
movhps qword ptr [edx + edi], xmm0 // V
|
||||
lea edx, [edx + 8]
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||
__declspec(naked) __declspec(align(32))
|
||||
void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
|
||||
@ -1223,6 +1302,77 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
mov eax, [esp + 8 + 4] // src_argb
|
||||
mov esi, [esp + 8 + 8] // src_stride_argb
|
||||
mov edx, [esp + 8 + 12] // dst_u
|
||||
mov edi, [esp + 8 + 16] // dst_v
|
||||
mov ecx, [esp + 8 + 20] // pix
|
||||
movdqa xmm7, kARGBToUJ
|
||||
movdqa xmm6, kARGBToVJ
|
||||
movdqa xmm5, kAddUVJ128
|
||||
sub edi, edx // stride from u to v
|
||||
|
||||
align 16
|
||||
convertloop:
|
||||
/* step 1 - subsample 16x2 argb pixels to 8x1 */
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
movdqu xmm2, [eax + 32]
|
||||
movdqu xmm3, [eax + 48]
|
||||
movdqu xmm4, [eax + esi]
|
||||
pavgb xmm0, xmm4
|
||||
movdqu xmm4, [eax + esi + 16]
|
||||
pavgb xmm1, xmm4
|
||||
movdqu xmm4, [eax + esi + 32]
|
||||
pavgb xmm2, xmm4
|
||||
movdqu xmm4, [eax + esi + 48]
|
||||
pavgb xmm3, xmm4
|
||||
lea eax, [eax + 64]
|
||||
movdqa xmm4, xmm0
|
||||
shufps xmm0, xmm1, 0x88
|
||||
shufps xmm4, xmm1, 0xdd
|
||||
pavgb xmm0, xmm4
|
||||
movdqa xmm4, xmm2
|
||||
shufps xmm2, xmm3, 0x88
|
||||
shufps xmm4, xmm3, 0xdd
|
||||
pavgb xmm2, xmm4
|
||||
|
||||
// step 2 - convert to U and V
|
||||
// from here down is very similar to Y code except
|
||||
// instead of 16 different pixels, its 8 pixels of U and 8 of V
|
||||
movdqa xmm1, xmm0
|
||||
movdqa xmm3, xmm2
|
||||
pmaddubsw xmm0, xmm7 // U
|
||||
pmaddubsw xmm2, xmm7
|
||||
pmaddubsw xmm1, xmm6 // V
|
||||
pmaddubsw xmm3, xmm6
|
||||
phaddw xmm0, xmm2
|
||||
phaddw xmm1, xmm3
|
||||
paddw xmm0, xmm5 // +.5 rounding -> unsigned
|
||||
paddw xmm1, xmm5
|
||||
psraw xmm0, 8
|
||||
psraw xmm1, 8
|
||||
packsswb xmm0, xmm1
|
||||
|
||||
// step 3 - store 8 U and 8 V values
|
||||
sub ecx, 16
|
||||
movlps qword ptr [edx], xmm0 // U
|
||||
movhps qword ptr [edx + edi], xmm0 // V
|
||||
lea edx, [edx + 8]
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
@ -4597,11 +4747,6 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
|
||||
#endif // HAS_ARGBATTENUATEROW_AVX2
|
||||
|
||||
#ifdef HAS_ARGBGRAYROW_SSSE3
|
||||
// Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R
|
||||
static const vec8 kARGBToGray = {
|
||||
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
|
||||
};
|
||||
|
||||
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
@ -4609,7 +4754,8 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
mov eax, [esp + 4] /* src_argb */
|
||||
mov edx, [esp + 8] /* dst_argb */
|
||||
mov ecx, [esp + 12] /* width */
|
||||
movdqa xmm4, kARGBToGray
|
||||
movdqa xmm4, kARGBToYJ
|
||||
movdqa xmm5, kAddYJ64
|
||||
sub edx, eax
|
||||
|
||||
align 16
|
||||
@ -4619,6 +4765,7 @@ void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
pmaddubsw xmm0, xmm4
|
||||
pmaddubsw xmm1, xmm4
|
||||
phaddw xmm0, xmm1
|
||||
paddw xmm0, xmm5 // Add .5 for rounding.
|
||||
psrlw xmm0, 7
|
||||
packuswb xmm0, xmm0 // 8 G bytes
|
||||
movdqa xmm2, [eax] // A
|
||||
|
||||
@ -689,7 +689,11 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
|
||||
benchmark_width_, DIFF, _Opt, +, 0)
|
||||
|
||||
TESTATOPLANAR(ARGB, 4, I420, 2, 2, 4)
|
||||
#ifdef __arm__
|
||||
TESTATOPLANAR(ARGB, 4, J420, 2, 2, 4)
|
||||
#else
|
||||
TESTATOPLANAR(ARGB, 4, J420, 2, 2, 0)
|
||||
#endif
|
||||
TESTATOPLANAR(BGRA, 4, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ABGR, 4, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGBA, 4, I420, 2, 2, 4)
|
||||
|
||||
@ -269,7 +269,6 @@ TEST_F(libyuvTest, TestARGBComputeCumulativeSum) {
|
||||
|
||||
TEST_F(libyuvTest, TestARGBGray) {
|
||||
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
||||
|
||||
// Test blue
|
||||
orig_pixels[0][0] = 255u;
|
||||
orig_pixels[0][1] = 0u;
|
||||
@ -285,30 +284,47 @@ TEST_F(libyuvTest, TestARGBGray) {
|
||||
orig_pixels[2][1] = 0u;
|
||||
orig_pixels[2][2] = 255u;
|
||||
orig_pixels[2][3] = 255u;
|
||||
// Test black
|
||||
orig_pixels[3][0] = 0u;
|
||||
orig_pixels[3][1] = 0u;
|
||||
orig_pixels[3][2] = 0u;
|
||||
orig_pixels[3][3] = 255u;
|
||||
// Test white
|
||||
orig_pixels[4][0] = 255u;
|
||||
orig_pixels[4][1] = 255u;
|
||||
orig_pixels[4][2] = 255u;
|
||||
orig_pixels[4][3] = 255u;
|
||||
// Test color
|
||||
orig_pixels[3][0] = 16u;
|
||||
orig_pixels[3][1] = 64u;
|
||||
orig_pixels[3][2] = 192u;
|
||||
orig_pixels[3][3] = 224u;
|
||||
orig_pixels[5][0] = 16u;
|
||||
orig_pixels[5][1] = 64u;
|
||||
orig_pixels[5][2] = 192u;
|
||||
orig_pixels[5][3] = 224u;
|
||||
// Do 16 to test asm version.
|
||||
ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
|
||||
EXPECT_EQ(27u, orig_pixels[0][0]);
|
||||
EXPECT_EQ(27u, orig_pixels[0][1]);
|
||||
EXPECT_EQ(27u, orig_pixels[0][2]);
|
||||
EXPECT_EQ(30u, orig_pixels[0][0]);
|
||||
EXPECT_EQ(30u, orig_pixels[0][1]);
|
||||
EXPECT_EQ(30u, orig_pixels[0][2]);
|
||||
EXPECT_EQ(128u, orig_pixels[0][3]);
|
||||
EXPECT_EQ(151u, orig_pixels[1][0]);
|
||||
EXPECT_EQ(151u, orig_pixels[1][1]);
|
||||
EXPECT_EQ(151u, orig_pixels[1][2]);
|
||||
EXPECT_EQ(149u, orig_pixels[1][0]);
|
||||
EXPECT_EQ(149u, orig_pixels[1][1]);
|
||||
EXPECT_EQ(149u, orig_pixels[1][2]);
|
||||
EXPECT_EQ(0u, orig_pixels[1][3]);
|
||||
EXPECT_EQ(75u, orig_pixels[2][0]);
|
||||
EXPECT_EQ(75u, orig_pixels[2][1]);
|
||||
EXPECT_EQ(75u, orig_pixels[2][2]);
|
||||
EXPECT_EQ(76u, orig_pixels[2][0]);
|
||||
EXPECT_EQ(76u, orig_pixels[2][1]);
|
||||
EXPECT_EQ(76u, orig_pixels[2][2]);
|
||||
EXPECT_EQ(255u, orig_pixels[2][3]);
|
||||
EXPECT_EQ(96u, orig_pixels[3][0]);
|
||||
EXPECT_EQ(96u, orig_pixels[3][1]);
|
||||
EXPECT_EQ(96u, orig_pixels[3][2]);
|
||||
EXPECT_EQ(224u, orig_pixels[3][3]);
|
||||
|
||||
EXPECT_EQ(0u, orig_pixels[3][0]);
|
||||
EXPECT_EQ(0u, orig_pixels[3][1]);
|
||||
EXPECT_EQ(0u, orig_pixels[3][2]);
|
||||
EXPECT_EQ(255u, orig_pixels[3][3]);
|
||||
EXPECT_EQ(255u, orig_pixels[4][0]);
|
||||
EXPECT_EQ(255u, orig_pixels[4][1]);
|
||||
EXPECT_EQ(255u, orig_pixels[4][2]);
|
||||
EXPECT_EQ(255u, orig_pixels[4][3]);
|
||||
EXPECT_EQ(96u, orig_pixels[5][0]);
|
||||
EXPECT_EQ(96u, orig_pixels[5][1]);
|
||||
EXPECT_EQ(96u, orig_pixels[5][2]);
|
||||
EXPECT_EQ(224u, orig_pixels[5][3]);
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
orig_pixels[i][0] = i;
|
||||
orig_pixels[i][1] = i / 2;
|
||||
@ -323,7 +339,6 @@ TEST_F(libyuvTest, TestARGBGray) {
|
||||
TEST_F(libyuvTest, TestARGBGrayTo) {
|
||||
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
||||
SIMD_ALIGNED(uint8 gray_pixels[256][4]);
|
||||
|
||||
// Test blue
|
||||
orig_pixels[0][0] = 255u;
|
||||
orig_pixels[0][1] = 0u;
|
||||
@ -339,30 +354,47 @@ TEST_F(libyuvTest, TestARGBGrayTo) {
|
||||
orig_pixels[2][1] = 0u;
|
||||
orig_pixels[2][2] = 255u;
|
||||
orig_pixels[2][3] = 255u;
|
||||
// Test black
|
||||
orig_pixels[3][0] = 0u;
|
||||
orig_pixels[3][1] = 0u;
|
||||
orig_pixels[3][2] = 0u;
|
||||
orig_pixels[3][3] = 255u;
|
||||
// Test white
|
||||
orig_pixels[4][0] = 255u;
|
||||
orig_pixels[4][1] = 255u;
|
||||
orig_pixels[4][2] = 255u;
|
||||
orig_pixels[4][3] = 255u;
|
||||
// Test color
|
||||
orig_pixels[3][0] = 16u;
|
||||
orig_pixels[3][1] = 64u;
|
||||
orig_pixels[3][2] = 192u;
|
||||
orig_pixels[3][3] = 224u;
|
||||
orig_pixels[5][0] = 16u;
|
||||
orig_pixels[5][1] = 64u;
|
||||
orig_pixels[5][2] = 192u;
|
||||
orig_pixels[5][3] = 224u;
|
||||
// Do 16 to test asm version.
|
||||
ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
|
||||
EXPECT_EQ(27u, gray_pixels[0][0]);
|
||||
EXPECT_EQ(27u, gray_pixels[0][1]);
|
||||
EXPECT_EQ(27u, gray_pixels[0][2]);
|
||||
EXPECT_EQ(30u, gray_pixels[0][0]);
|
||||
EXPECT_EQ(30u, gray_pixels[0][1]);
|
||||
EXPECT_EQ(30u, gray_pixels[0][2]);
|
||||
EXPECT_EQ(128u, gray_pixels[0][3]);
|
||||
EXPECT_EQ(151u, gray_pixels[1][0]);
|
||||
EXPECT_EQ(151u, gray_pixels[1][1]);
|
||||
EXPECT_EQ(151u, gray_pixels[1][2]);
|
||||
EXPECT_EQ(149u, gray_pixels[1][0]);
|
||||
EXPECT_EQ(149u, gray_pixels[1][1]);
|
||||
EXPECT_EQ(149u, gray_pixels[1][2]);
|
||||
EXPECT_EQ(0u, gray_pixels[1][3]);
|
||||
EXPECT_EQ(75u, gray_pixels[2][0]);
|
||||
EXPECT_EQ(75u, gray_pixels[2][1]);
|
||||
EXPECT_EQ(75u, gray_pixels[2][2]);
|
||||
EXPECT_EQ(76u, gray_pixels[2][0]);
|
||||
EXPECT_EQ(76u, gray_pixels[2][1]);
|
||||
EXPECT_EQ(76u, gray_pixels[2][2]);
|
||||
EXPECT_EQ(255u, gray_pixels[2][3]);
|
||||
EXPECT_EQ(96u, gray_pixels[3][0]);
|
||||
EXPECT_EQ(96u, gray_pixels[3][1]);
|
||||
EXPECT_EQ(96u, gray_pixels[3][2]);
|
||||
EXPECT_EQ(224u, gray_pixels[3][3]);
|
||||
|
||||
EXPECT_EQ(0u, gray_pixels[3][0]);
|
||||
EXPECT_EQ(0u, gray_pixels[3][1]);
|
||||
EXPECT_EQ(0u, gray_pixels[3][2]);
|
||||
EXPECT_EQ(255u, gray_pixels[3][3]);
|
||||
EXPECT_EQ(255u, gray_pixels[4][0]);
|
||||
EXPECT_EQ(255u, gray_pixels[4][1]);
|
||||
EXPECT_EQ(255u, gray_pixels[4][2]);
|
||||
EXPECT_EQ(255u, gray_pixels[4][3]);
|
||||
EXPECT_EQ(96u, gray_pixels[5][0]);
|
||||
EXPECT_EQ(96u, gray_pixels[5][1]);
|
||||
EXPECT_EQ(96u, gray_pixels[5][2]);
|
||||
EXPECT_EQ(224u, gray_pixels[5][3]);
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
orig_pixels[i][0] = i;
|
||||
orig_pixels[i][1] = i / 2;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user