mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Change Sobel to use JPeg Luma calculation instead of extracting G channel. Using luma produces a better sobel that respects all 3 channels of RGB. Historically the G channel was used to improve performance, and because the luma of I420 is a constrained range, hurting quality. Using the JPeg variation of YUV, the luma is more accurate, including cross platform, better optimized for AVX2 and odd widths, and full range.
BUG=444 TESTED=ARGBSobelXY_Opt R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/57479004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1414 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
535a7140f2
commit
cfce47efc8
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1415
|
Version: 1416
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -143,6 +143,12 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
|||||||
uint8* dst_y, int dst_stride_y,
|
uint8* dst_y, int dst_stride_y,
|
||||||
int width, int height);
|
int width, int height);
|
||||||
|
|
||||||
|
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
|
||||||
|
LIBYUV_API
|
||||||
|
int ARGBToG(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_g, int dst_stride_g,
|
||||||
|
int width, int height);
|
||||||
|
|
||||||
// Convert ARGB To NV12.
|
// Convert ARGB To NV12.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||||
|
|||||||
@ -70,7 +70,6 @@ extern "C" {
|
|||||||
#define HAS_ARGBSHUFFLEROW_SSSE3
|
#define HAS_ARGBSHUFFLEROW_SSSE3
|
||||||
#define HAS_ARGBTOARGB1555ROW_SSE2
|
#define HAS_ARGBTOARGB1555ROW_SSE2
|
||||||
#define HAS_ARGBTOARGB4444ROW_SSE2
|
#define HAS_ARGBTOARGB4444ROW_SSE2
|
||||||
#define HAS_ARGBTOBAYERGGROW_SSE2
|
|
||||||
#define HAS_ARGBTORAWROW_SSSE3
|
#define HAS_ARGBTORAWROW_SSSE3
|
||||||
#define HAS_ARGBTORGB24ROW_SSSE3
|
#define HAS_ARGBTORGB24ROW_SSSE3
|
||||||
#define HAS_ARGBTORGB565ROW_SSE2
|
#define HAS_ARGBTORGB565ROW_SSE2
|
||||||
@ -271,7 +270,6 @@ extern "C" {
|
|||||||
#define HAS_ARGB4444TOYROW_NEON
|
#define HAS_ARGB4444TOYROW_NEON
|
||||||
#define HAS_ARGBTOARGB1555ROW_NEON
|
#define HAS_ARGBTOARGB1555ROW_NEON
|
||||||
#define HAS_ARGBTOARGB4444ROW_NEON
|
#define HAS_ARGBTOARGB4444ROW_NEON
|
||||||
#define HAS_ARGBTOBAYERGGROW_NEON
|
|
||||||
#define HAS_ARGBTORAWROW_NEON
|
#define HAS_ARGBTORAWROW_NEON
|
||||||
#define HAS_ARGBTORGB24ROW_NEON
|
#define HAS_ARGBTORGB24ROW_NEON
|
||||||
#define HAS_ARGBTORGB565ROW_NEON
|
#define HAS_ARGBTORGB565ROW_NEON
|
||||||
@ -1632,17 +1630,6 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
|
void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
|
||||||
uint8* dst_u, uint8* dst_v, int pix);
|
uint8* dst_u, uint8* dst_v, int pix);
|
||||||
|
|
||||||
void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 /* selector */, int pix);
|
|
||||||
void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 /* selector */, int pix);
|
|
||||||
void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 /* selector */, int pix);
|
|
||||||
void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 /* selector */, int pix);
|
|
||||||
void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 /* selector */, int pix);
|
|
||||||
|
|
||||||
void I422ToYUY2Row_C(const uint8* src_y,
|
void I422ToYUY2Row_C(const uint8* src_y,
|
||||||
const uint8* src_u,
|
const uint8* src_u,
|
||||||
const uint8* src_v,
|
const uint8* src_v,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1415
|
#define LIBYUV_VERSION 1416
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -1976,8 +1976,8 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
|
|||||||
const uint8* src_sobely,
|
const uint8* src_sobely,
|
||||||
uint8* dst, int width)) {
|
uint8* dst, int width)) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) =
|
||||||
uint32 selector, int pix) = ARGBToBayerGGRow_C;
|
ARGBToYJRow_C;
|
||||||
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
|
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
|
||||||
uint8* dst_sobely, int width) = SobelYRow_C;
|
uint8* dst_sobely, int width) = SobelYRow_C;
|
||||||
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
|
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
|
||||||
@ -1993,31 +1993,32 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
|
|||||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||||
src_stride_argb = -src_stride_argb;
|
src_stride_argb = -src_stride_argb;
|
||||||
}
|
}
|
||||||
// ARGBToBayer used to select G channel from ARGB.
|
|
||||||
#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
|
#if defined(HAS_ARGBTOYJROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
|
||||||
ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
|
|
||||||
if (IS_ALIGNED(width, 8)) {
|
|
||||||
ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
||||||
if (IS_ALIGNED(width, 8)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOBAYERGGROW_NEON)
|
#if defined(HAS_ARGBTOYJROW_AVX2)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
|
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
|
||||||
|
if (IS_ALIGNED(width, 32)) {
|
||||||
|
ARGBToYJRow = ARGBToYJRow_AVX2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_ARGBTOYJROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
|
ARGBToYJRow = ARGBToYJRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 8)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
ARGBToBayerRow = ARGBToBayerGGRow_NEON;
|
ARGBToYJRow = ARGBToYJRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_SOBELYROW_SSE2)
|
#if defined(HAS_SOBELYROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
SobelYRow = SobelYRow_SSE2;
|
SobelYRow = SobelYRow_SSE2;
|
||||||
@ -2050,20 +2051,20 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
|
|||||||
uint8* row_y0 = row_y + kEdge;
|
uint8* row_y0 = row_y + kEdge;
|
||||||
uint8* row_y1 = row_y0 + kRowSize;
|
uint8* row_y1 = row_y0 + kRowSize;
|
||||||
uint8* row_y2 = row_y1 + kRowSize;
|
uint8* row_y2 = row_y1 + kRowSize;
|
||||||
ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
|
ARGBToYJRow(src_argb, row_y0, width);
|
||||||
row_y0[-1] = row_y0[0];
|
row_y0[-1] = row_y0[0];
|
||||||
memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
|
memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
|
||||||
ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
|
ARGBToYJRow(src_argb, row_y1, width);
|
||||||
row_y1[-1] = row_y1[0];
|
row_y1[-1] = row_y1[0];
|
||||||
memset(row_y1 + width, row_y1[width - 1], 16);
|
memset(row_y1 + width, row_y1[width - 1], 16);
|
||||||
memset(row_y2 + width, 0, 16);
|
memset(row_y2 + width, 0, 16);
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
// Convert next row of ARGB to Y.
|
// Convert next row of ARGB to G.
|
||||||
if (y < (height - 1)) {
|
if (y < (height - 1)) {
|
||||||
src_argb += src_stride_argb;
|
src_argb += src_stride_argb;
|
||||||
}
|
}
|
||||||
ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
|
ARGBToYJRow(src_argb, row_y2, width);
|
||||||
row_y2[-1] = row_y2[0];
|
row_y2[-1] = row_y2[0];
|
||||||
row_y2[width] = row_y2[width - 1];
|
row_y2[width] = row_y2[width - 1];
|
||||||
|
|
||||||
|
|||||||
@ -278,27 +278,6 @@ RGBDANY(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON,
|
|||||||
#endif
|
#endif
|
||||||
#undef RGBDANY
|
#undef RGBDANY
|
||||||
|
|
||||||
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
|
|
||||||
#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \
|
|
||||||
void NAMEANY(const uint8* src, uint8* dst, uint32 selector, int width) { \
|
|
||||||
int n = width & ~MASK; \
|
|
||||||
if (n > 0) { \
|
|
||||||
ARGBTORGB_SIMD(src, dst, selector, n); \
|
|
||||||
} \
|
|
||||||
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
|
|
||||||
BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C,
|
|
||||||
4, 1, 7)
|
|
||||||
#endif
|
|
||||||
#if defined(HAS_ARGBTOBAYERGGROW_NEON)
|
|
||||||
BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C,
|
|
||||||
4, 1, 7)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef BAYERANY
|
|
||||||
|
|
||||||
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
|
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
|
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
|
||||||
int n = width & ~MASK; \
|
int n = width & ~MASK; \
|
||||||
|
|||||||
@ -1011,17 +1011,17 @@ void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
|
|||||||
#define VR -102 /* round(-1.596 * 64) */
|
#define VR -102 /* round(-1.596 * 64) */
|
||||||
|
|
||||||
// Bias values to subtract 16 from Y and 128 from U and V.
|
// Bias values to subtract 16 from Y and 128 from U and V.
|
||||||
#define BB (UB * 128 + YGB)
|
#define BB (UB * 128 + YGB)
|
||||||
#define BG (UG * 128 + VG * 128 + YGB)
|
#define BG (UG * 128 + VG * 128 + YGB)
|
||||||
#define BR (VR * 128 + YGB)
|
#define BR (VR * 128 + YGB)
|
||||||
|
|
||||||
// C reference code that mimics the YUV assembly.
|
// C reference code that mimics the YUV assembly.
|
||||||
static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
|
static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
|
||||||
uint8* b, uint8* g, uint8* r) {
|
uint8* b, uint8* g, uint8* r) {
|
||||||
uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
|
uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
|
||||||
*b = Clamp((int32)(-( u * UB) + y1 + BB) >> 6);
|
*b = Clamp((int32)(-(u * UB) + y1 + BB) >> 6);
|
||||||
*g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6);
|
*g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6);
|
||||||
*r = Clamp((int32)(-(v * VR ) + y1 + BR) >> 6);
|
*r = Clamp((int32)(-(v * VR)+ y1 + BR) >> 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
// C reference code that mimics the YUV assembly.
|
// C reference code that mimics the YUV assembly.
|
||||||
@ -1059,17 +1059,17 @@ static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
|
|||||||
#define VRJ -90 /* round(-1.40200 * 64) */
|
#define VRJ -90 /* round(-1.40200 * 64) */
|
||||||
|
|
||||||
// Bias values to subtract 16 from Y and 128 from U and V.
|
// Bias values to subtract 16 from Y and 128 from U and V.
|
||||||
#define BBJ (UBJ * 128 + YGBJ)
|
#define BBJ (UBJ * 128 + YGBJ)
|
||||||
#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
|
#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
|
||||||
#define BRJ (VRJ * 128 + YGBJ)
|
#define BRJ (VRJ * 128 + YGBJ)
|
||||||
|
|
||||||
// C reference code that mimics the YUV assembly.
|
// C reference code that mimics the YUV assembly.
|
||||||
static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v,
|
static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v,
|
||||||
uint8* b, uint8* g, uint8* r) {
|
uint8* b, uint8* g, uint8* r) {
|
||||||
uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16;
|
uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16;
|
||||||
*b = Clamp((int32)(-( u * UBJ) + y1 + BBJ) >> 6);
|
*b = Clamp((int32)(-(u * UBJ) + y1 + BBJ) >> 6);
|
||||||
*g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6);
|
*g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6);
|
||||||
*r = Clamp((int32)(-(v * VRJ ) + y1 + BRJ) >> 6);
|
*r = Clamp((int32)(-(v * VRJ) + y1 + BRJ) >> 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef YGJ
|
#undef YGJ
|
||||||
@ -2086,22 +2086,6 @@ void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select G channel from ARGB. e.g. GGGGGGGG
|
|
||||||
void ARGBToBayerGGRow_C(const uint8* src_argb,
|
|
||||||
uint8* dst_bayer, uint32 selector, int pix) {
|
|
||||||
// Copy a row of G.
|
|
||||||
int x;
|
|
||||||
for (x = 0; x < pix - 1; x += 2) {
|
|
||||||
dst_bayer[0] = src_argb[1];
|
|
||||||
dst_bayer[1] = src_argb[5];
|
|
||||||
src_argb += 8;
|
|
||||||
dst_bayer += 2;
|
|
||||||
}
|
|
||||||
if (pix & 1) {
|
|
||||||
dst_bayer[0] = src_argb[1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use first 4 shuffler values to reorder ARGB channels.
|
// Use first 4 shuffler values to reorder ARGB channels.
|
||||||
void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
|
void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
|
||||||
const uint8* shuffler, int pix) {
|
const uint8* shuffler, int pix) {
|
||||||
|
|||||||
@ -1251,25 +1251,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select G channels from ARGB. e.g. GGGGGGGG
|
|
||||||
void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 /*selector*/, int pix) {
|
|
||||||
asm volatile (
|
|
||||||
"1: \n"
|
|
||||||
MEMACCESS(0)
|
|
||||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels.
|
|
||||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
|
||||||
MEMACCESS(1)
|
|
||||||
"vst1.8 {d1}, [%1]! \n" // store 8 G's.
|
|
||||||
"bgt 1b \n"
|
|
||||||
: "+r"(src_argb), // %0
|
|
||||||
"+r"(dst_bayer), // %1
|
|
||||||
"+r"(pix) // %2
|
|
||||||
:
|
|
||||||
: "cc", "memory", "q0", "q1" // Clobber List
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||||
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||||
const uint8* shuffler, int pix) {
|
const uint8* shuffler, int pix) {
|
||||||
|
|||||||
@ -1259,27 +1259,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
|||||||
}
|
}
|
||||||
#endif // HAS_UYVYTOUVROW_NEON
|
#endif // HAS_UYVYTOUVROW_NEON
|
||||||
|
|
||||||
// Select G channels from ARGB. e.g. GGGGGGGG
|
|
||||||
#ifdef HAS_ARGBTOBAYERGGROW_NEON
|
|
||||||
void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 /*selector*/, int pix) {
|
|
||||||
asm volatile (
|
|
||||||
"1: \n"
|
|
||||||
MEMACCESS(0)
|
|
||||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels
|
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop
|
|
||||||
MEMACCESS(1)
|
|
||||||
"st1 {v1.8b}, [%1], #8 \n" // store 8 G's.
|
|
||||||
"b.gt 1b \n"
|
|
||||||
: "+r"(src_argb), // %0
|
|
||||||
"+r"(dst_bayer), // %1
|
|
||||||
"+r"(pix) // %2
|
|
||||||
:
|
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
|
||||||
);
|
|
||||||
}
|
|
||||||
#endif // HAS_ARGBTOBAYERGGROW_NEON
|
|
||||||
|
|
||||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||||
#ifdef HAS_ARGBSHUFFLEROW_NEON
|
#ifdef HAS_ARGBSHUFFLEROW_NEON
|
||||||
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
|||||||
@ -1599,8 +1599,8 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
|
|||||||
"punpcklwd %%xmm2,%%xmm0 \n" \
|
"punpcklwd %%xmm2,%%xmm0 \n" \
|
||||||
"punpckhwd %%xmm2,%%xmm1 \n" \
|
"punpckhwd %%xmm2,%%xmm1 \n" \
|
||||||
"movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \
|
"movdqu %%xmm0," MEMACCESS([dst_argb]) " \n" \
|
||||||
"movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) " \n" \
|
"movdqu %%xmm1," MEMACCESS2(0x10, [dst_argb]) " \n" \
|
||||||
"lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n"
|
"lea " MEMLEA(0x20, [dst_argb]) ", %[dst_argb] \n"
|
||||||
|
|
||||||
// Store 8 BGRA values. Assumes XMM5 is zero.
|
// Store 8 BGRA values. Assumes XMM5 is zero.
|
||||||
#define STOREBGRA \
|
#define STOREBGRA \
|
||||||
@ -1611,8 +1611,8 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
|
|||||||
"punpcklwd %%xmm1,%%xmm5 \n" \
|
"punpcklwd %%xmm1,%%xmm5 \n" \
|
||||||
"punpckhwd %%xmm1,%%xmm0 \n" \
|
"punpckhwd %%xmm1,%%xmm0 \n" \
|
||||||
"movdqu %%xmm5," MEMACCESS([dst_bgra]) " \n" \
|
"movdqu %%xmm5," MEMACCESS([dst_bgra]) " \n" \
|
||||||
"movdqu %%xmm0," MEMACCESS2(0x10,[dst_bgra]) " \n" \
|
"movdqu %%xmm0," MEMACCESS2(0x10, [dst_bgra]) " \n" \
|
||||||
"lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n"
|
"lea " MEMLEA(0x20, [dst_bgra]) ", %[dst_bgra] \n"
|
||||||
|
|
||||||
// Store 8 ABGR values. Assumes XMM5 is zero.
|
// Store 8 ABGR values. Assumes XMM5 is zero.
|
||||||
#define STOREABGR \
|
#define STOREABGR \
|
||||||
@ -1622,8 +1622,8 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
|
|||||||
"punpcklwd %%xmm0,%%xmm2 \n" \
|
"punpcklwd %%xmm0,%%xmm2 \n" \
|
||||||
"punpckhwd %%xmm0,%%xmm1 \n" \
|
"punpckhwd %%xmm0,%%xmm1 \n" \
|
||||||
"movdqu %%xmm2," MEMACCESS([dst_abgr]) " \n" \
|
"movdqu %%xmm2," MEMACCESS([dst_abgr]) " \n" \
|
||||||
"movdqu %%xmm1," MEMACCESS2(0x10,[dst_abgr]) " \n" \
|
"movdqu %%xmm1," MEMACCESS2(0x10, [dst_abgr]) " \n" \
|
||||||
"lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n"
|
"lea " MEMLEA(0x20, [dst_abgr]) ", %[dst_abgr] \n"
|
||||||
|
|
||||||
// Store 8 RGBA values. Assumes XMM5 is zero.
|
// Store 8 RGBA values. Assumes XMM5 is zero.
|
||||||
#define STORERGBA \
|
#define STORERGBA \
|
||||||
@ -1634,8 +1634,8 @@ YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
|
|||||||
"punpcklwd %%xmm1,%%xmm5 \n" \
|
"punpcklwd %%xmm1,%%xmm5 \n" \
|
||||||
"punpckhwd %%xmm1,%%xmm0 \n" \
|
"punpckhwd %%xmm1,%%xmm0 \n" \
|
||||||
"movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \
|
"movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \
|
||||||
"movdqu %%xmm0," MEMACCESS2(0x10,[dst_rgba]) " \n" \
|
"movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
|
||||||
"lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n"
|
"lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
|
||||||
|
|
||||||
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||||
const uint8* u_buf,
|
const uint8* u_buf,
|
||||||
@ -5030,37 +5030,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif // HAS_INTERPOLATEROW_SSE2
|
#endif // HAS_INTERPOLATEROW_SSE2
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOBAYERGGROW_SSE2
|
|
||||||
void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 selector, int pix) {
|
|
||||||
asm volatile (
|
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
|
||||||
"psrld $0x18,%%xmm5 \n"
|
|
||||||
LABELALIGN
|
|
||||||
"1: \n"
|
|
||||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
|
||||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
|
||||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
|
||||||
"psrld $0x8,%%xmm0 \n"
|
|
||||||
"psrld $0x8,%%xmm1 \n"
|
|
||||||
"pand %%xmm5,%%xmm0 \n"
|
|
||||||
"pand %%xmm5,%%xmm1 \n"
|
|
||||||
"packssdw %%xmm1,%%xmm0 \n"
|
|
||||||
"packuswb %%xmm1,%%xmm0 \n"
|
|
||||||
"movq %%xmm0," MEMACCESS(1) " \n"
|
|
||||||
"lea " MEMLEA(0x8,1) ",%1 \n"
|
|
||||||
"sub $0x8,%2 \n"
|
|
||||||
"jg 1b \n"
|
|
||||||
: "+r"(src_argb), // %0
|
|
||||||
"+r"(dst_bayer), // %1
|
|
||||||
"+r"(pix) // %2
|
|
||||||
:
|
|
||||||
: "memory", "cc"
|
|
||||||
, "xmm0", "xmm1", "xmm5"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
#endif // HAS_ARGBTOBAYERGGROW_SSE2
|
|
||||||
|
|
||||||
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
|
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
|
||||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||||
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
|||||||
@ -5875,36 +5875,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
|||||||
}
|
}
|
||||||
#endif // HAS_INTERPOLATEROW_SSE2
|
#endif // HAS_INTERPOLATEROW_SSE2
|
||||||
|
|
||||||
// Specialized ARGB to Bayer that just isolates G channel.
|
|
||||||
__declspec(naked)
|
|
||||||
void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
|
|
||||||
uint32 selector, int pix) {
|
|
||||||
__asm {
|
|
||||||
mov eax, [esp + 4] // src_argb
|
|
||||||
mov edx, [esp + 8] // dst_bayer
|
|
||||||
// selector
|
|
||||||
mov ecx, [esp + 16] // pix
|
|
||||||
pcmpeqb xmm5, xmm5 // generate mask 0x000000ff
|
|
||||||
psrld xmm5, 24
|
|
||||||
|
|
||||||
wloop:
|
|
||||||
movdqu xmm0, [eax]
|
|
||||||
movdqu xmm1, [eax + 16]
|
|
||||||
lea eax, [eax + 32]
|
|
||||||
psrld xmm0, 8 // Move green to bottom.
|
|
||||||
psrld xmm1, 8
|
|
||||||
pand xmm0, xmm5
|
|
||||||
pand xmm1, xmm5
|
|
||||||
packssdw xmm0, xmm1
|
|
||||||
packuswb xmm0, xmm1
|
|
||||||
movq qword ptr [edx], xmm0
|
|
||||||
lea edx, [edx + 8]
|
|
||||||
sub ecx, 8
|
|
||||||
jg wloop
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||||
__declspec(naked)
|
__declspec(naked)
|
||||||
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user