mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Sobel use G channel for consistency on all CPUs, better performance and full range of 0 to 255.
BUG=201 TESTED=out\release\libyuv_unittest --gtest_filter=*Sobel* Review URL: https://webrtc-codereview.appspot.com/1225004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@614 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
30a96ede5b
commit
e8df16bd7c
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 613
|
Version: 614
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -19,6 +19,11 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// TODO(fbarchard): Remove kMaxStride.
|
// TODO(fbarchard): Remove kMaxStride.
|
||||||
|
// Functions should allocate a single row buffer of this size on the stack.
|
||||||
|
// Functions that allocate more than one row buffer may fail or cause stack
|
||||||
|
// probe.
|
||||||
|
// This size is a retina Mac pixels of 32 bit ARGB.
|
||||||
|
// Functions may want less for 8 or 16 bit row buffers.
|
||||||
#define kMaxStride (2880 * 4)
|
#define kMaxStride (2880 * 4)
|
||||||
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
|
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 613
|
#define LIBYUV_VERSION 614
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -72,10 +72,10 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
|
|||||||
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
||||||
uint32 selector, int pix) = ARGBToBayerRow_C;
|
uint32 selector, int pix) = ARGBToBayerRow_C;
|
||||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4 &&
|
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
|
||||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||||
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
||||||
if (IS_ALIGNED(width, 4)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -430,9 +430,9 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
|
|||||||
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
||||||
uint32 selector, int pix) = ARGBToBayerRow_C;
|
uint32 selector, int pix) = ARGBToBayerRow_C;
|
||||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
|
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||||
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
||||||
if (IS_ALIGNED(width, 4)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1823,13 +1823,12 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Sobel ARGB effect.
|
// Sobel ARGB effect.
|
||||||
// TODO(fbarchard): Enable AVX2. Mixing SSSE3 and AVX2 requires zeroupper.
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBSobel(const uint8* src_argb, int src_stride_argb,
|
int ARGBSobel(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_argb, int dst_stride_argb,
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
int width, int height) {
|
int width, int height) {
|
||||||
if (!src_argb || !dst_argb ||
|
if (!src_argb || !dst_argb ||
|
||||||
width <= 0 || height == 0 || width > kMaxStride) {
|
width <= 0 || height == 0 || width > (kMaxStride / 4)) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// Negative height means invert the image.
|
// Negative height means invert the image.
|
||||||
@ -1838,39 +1837,25 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
|
|||||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||||
src_stride_argb = -src_stride_argb;
|
src_stride_argb = -src_stride_argb;
|
||||||
}
|
}
|
||||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
// ARGBToBayer used to select G channel from ARGB.
|
||||||
ARGBToYRow_C;
|
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
||||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
uint32 selector, int pix) = ARGBToBayerRow_C;
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
|
||||||
if (IS_ALIGNED(width, 16)) {
|
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
||||||
// Assumed row buffer aligned.
|
|
||||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
|
||||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
|
|
||||||
bool clear = false;
|
|
||||||
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
|
|
||||||
clear = true;
|
|
||||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
|
||||||
if (IS_ALIGNED(width, 32)) {
|
|
||||||
ARGBToYRow = ARGBToYRow_AVX2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(HAS_ARGBTOYROW_NEON)
|
|
||||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
|
||||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
|
||||||
if (IS_ALIGNED(width, 8)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
ARGBToYRow = ARGBToYRow_NEON;
|
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif defined(HAS_ARGBTOBAYERROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
|
||||||
|
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
|
||||||
|
if (IS_ALIGNED(width, 4)) {
|
||||||
|
ARGBToBayerRow = ARGBToBayerRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
|
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
|
||||||
uint8* dst_sobely, int width) = SobelYRow_C;
|
uint8* dst_sobely, int width) = SobelYRow_C;
|
||||||
#if defined(HAS_SOBELYROW_SSSE3)
|
#if defined(HAS_SOBELYROW_SSSE3)
|
||||||
@ -1896,18 +1881,18 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
const int kEdge = 16; // Extra pixels at start of row for extrude/align.
|
const int kEdge = 16; // Extra pixels at start of row for extrude/align.
|
||||||
SIMD_ALIGNED(uint8 row_y[(kMaxStride + kEdge) * 3 + kEdge]);
|
SIMD_ALIGNED(uint8 row_y[(kMaxStride / 4 + kEdge) * 3 + kEdge]);
|
||||||
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride]);
|
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride / 4]);
|
||||||
SIMD_ALIGNED(uint8 row_sobely[kMaxStride]);
|
SIMD_ALIGNED(uint8 row_sobely[kMaxStride / 4]);
|
||||||
|
|
||||||
// Convert first row.
|
// Convert first row.
|
||||||
uint8* row_y0 = row_y + kEdge;
|
uint8* row_y0 = row_y + kEdge;
|
||||||
uint8* row_y1 = row_y0 + kMaxStride;
|
uint8* row_y1 = row_y0 + kMaxStride / 4;
|
||||||
uint8* row_y2 = row_y1 + kMaxStride;
|
uint8* row_y2 = row_y1 + kMaxStride / 4;
|
||||||
ARGBToYRow(src_argb, row_y0, width);
|
ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
|
||||||
row_y0[-1] = row_y0[0];
|
row_y0[-1] = row_y0[0];
|
||||||
row_y0[width] = row_y0[width - 1];
|
row_y0[width] = row_y0[width - 1];
|
||||||
ARGBToYRow(src_argb, row_y1, width);
|
ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
|
||||||
row_y1[-1] = row_y1[0];
|
row_y1[-1] = row_y1[0];
|
||||||
row_y1[width] = row_y1[width - 1];
|
row_y1[width] = row_y1[width - 1];
|
||||||
|
|
||||||
@ -1916,7 +1901,7 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
|
|||||||
if (y < (height - 1)) {
|
if (y < (height - 1)) {
|
||||||
src_argb += src_stride_argb;
|
src_argb += src_stride_argb;
|
||||||
}
|
}
|
||||||
ARGBToYRow(src_argb, row_y2, width);
|
ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
|
||||||
row_y2[-1] = row_y2[0];
|
row_y2[-1] = row_y2[0];
|
||||||
row_y2[width] = row_y2[width - 1];
|
row_y2[width] = row_y2[width - 1];
|
||||||
|
|
||||||
@ -1932,23 +1917,17 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
|
|||||||
|
|
||||||
dst_argb += dst_stride_argb;
|
dst_argb += dst_stride_argb;
|
||||||
}
|
}
|
||||||
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
|
|
||||||
if (clear) {
|
|
||||||
__asm vzeroupper;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// SobelXY ARGB effect.
|
// SobelXY ARGB effect.
|
||||||
// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
|
// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
|
||||||
// TODO(fbarchard): Enable AVX2. Mixing SSSE3 and AVX2 requires zeroupper.
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_argb, int dst_stride_argb,
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
int width, int height) {
|
int width, int height) {
|
||||||
if (!src_argb || !dst_argb ||
|
if (!src_argb || !dst_argb ||
|
||||||
width <= 0 || height == 0 || width > kMaxStride) {
|
width <= 0 || height == 0 || width > kMaxStride / 4) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// Negative height means invert the image.
|
// Negative height means invert the image.
|
||||||
@ -1957,35 +1936,22 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
|||||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||||
src_stride_argb = -src_stride_argb;
|
src_stride_argb = -src_stride_argb;
|
||||||
}
|
}
|
||||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
// ARGBToBayer used to select G channel from ARGB.
|
||||||
ARGBToYRow_C;
|
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
||||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
uint32 selector, int pix) = ARGBToBayerRow_C;
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
|
||||||
if (IS_ALIGNED(width, 16)) {
|
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
||||||
// Assumed row buffer aligned.
|
|
||||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
|
||||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
|
|
||||||
bool clear = false;
|
|
||||||
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
|
|
||||||
clear = true;
|
|
||||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
|
||||||
if (IS_ALIGNED(width, 32)) {
|
|
||||||
ARGBToYRow = ARGBToYRow_AVX2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(HAS_ARGBTOYROW_NEON)
|
|
||||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
|
||||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
|
||||||
if (IS_ALIGNED(width, 8)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
ARGBToYRow = ARGBToYRow_NEON;
|
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif defined(HAS_ARGBTOBAYERROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
|
||||||
|
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
|
||||||
|
if (IS_ALIGNED(width, 4)) {
|
||||||
|
ARGBToBayerRow = ARGBToBayerRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -2015,18 +1981,18 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
const int kEdge = 16; // Extra pixels at start of row for extrude/align.
|
const int kEdge = 16; // Extra pixels at start of row for extrude/align.
|
||||||
SIMD_ALIGNED(uint8 row_y[(kMaxStride + kEdge) * 3 + kEdge]);
|
SIMD_ALIGNED(uint8 row_y[(kMaxStride / 4 + kEdge) * 3 + kEdge]);
|
||||||
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride]);
|
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride / 4]);
|
||||||
SIMD_ALIGNED(uint8 row_sobely[kMaxStride]);
|
SIMD_ALIGNED(uint8 row_sobely[kMaxStride / 4]);
|
||||||
|
|
||||||
// Convert first row.
|
// Convert first row.
|
||||||
uint8* row_y0 = row_y + kEdge;
|
uint8* row_y0 = row_y + kEdge;
|
||||||
uint8* row_y1 = row_y0 + kMaxStride;
|
uint8* row_y1 = row_y0 + kMaxStride / 4;
|
||||||
uint8* row_y2 = row_y1 + kMaxStride;
|
uint8* row_y2 = row_y1 + kMaxStride / 4;
|
||||||
ARGBToYRow(src_argb, row_y0, width);
|
ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
|
||||||
row_y0[-1] = row_y0[0];
|
row_y0[-1] = row_y0[0];
|
||||||
row_y0[width] = row_y0[width - 1];
|
row_y0[width] = row_y0[width - 1];
|
||||||
ARGBToYRow(src_argb, row_y1, width);
|
ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
|
||||||
row_y1[-1] = row_y1[0];
|
row_y1[-1] = row_y1[0];
|
||||||
row_y1[width] = row_y1[width - 1];
|
row_y1[width] = row_y1[width - 1];
|
||||||
|
|
||||||
@ -2035,7 +2001,7 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
|||||||
if (y < (height - 1)) {
|
if (y < (height - 1)) {
|
||||||
src_argb += src_stride_argb;
|
src_argb += src_stride_argb;
|
||||||
}
|
}
|
||||||
ARGBToYRow(src_argb, row_y2, width);
|
ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
|
||||||
row_y2[-1] = row_y2[0];
|
row_y2[-1] = row_y2[0];
|
||||||
row_y2[width] = row_y2[width - 1];
|
row_y2[width] = row_y2[width - 1];
|
||||||
|
|
||||||
@ -2051,11 +2017,6 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
|||||||
|
|
||||||
dst_argb += dst_stride_argb;
|
dst_argb += dst_stride_argb;
|
||||||
}
|
}
|
||||||
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
|
|
||||||
if (clear) {
|
|
||||||
__asm vzeroupper;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -186,7 +186,7 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
|
|||||||
|
|
||||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||||
BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
|
BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
|
||||||
3, 4, 1)
|
7, 4, 1)
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOBAYERROW_NEON)
|
#if defined(HAS_ARGBTOBAYERROW_NEON)
|
||||||
BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
|
BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
|
||||||
|
|||||||
@ -4595,11 +4595,14 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
|||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"lea 0x10(%0),%0 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
|
"lea 0x20(%0),%0 \n"
|
||||||
"pshufb %%xmm5,%%xmm0 \n"
|
"pshufb %%xmm5,%%xmm0 \n"
|
||||||
"sub $0x4,%2 \n"
|
"pshufb %%xmm5,%%xmm1 \n"
|
||||||
"movd %%xmm0,(%1) \n"
|
"punpckldq xmm1, xmm0 \n"
|
||||||
"lea 0x4(%1),%1 \n"
|
"sub $0x8,%2 \n"
|
||||||
|
"movq %%xmm0,(%1) \n"
|
||||||
|
"lea 0x8(%1),%1 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_bayer), // %1
|
"+r"(dst_bayer), // %1
|
||||||
@ -4607,7 +4610,7 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
|||||||
: "g"(selector) // %3
|
: "g"(selector) // %3
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm5"
|
, "xmm0", "xmm1", "xmm5"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5795,11 +5795,14 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
|||||||
align 16
|
align 16
|
||||||
wloop:
|
wloop:
|
||||||
movdqa xmm0, [eax]
|
movdqa xmm0, [eax]
|
||||||
lea eax, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
|
lea eax, [eax + 32]
|
||||||
pshufb xmm0, xmm5
|
pshufb xmm0, xmm5
|
||||||
sub ecx, 4
|
pshufb xmm1, xmm5
|
||||||
movd [edx], xmm0
|
punpckldq xmm0, xmm1
|
||||||
lea edx, [edx + 4]
|
sub ecx, 8
|
||||||
|
movq qword ptr [edx], xmm0
|
||||||
|
lea edx, [edx + 8]
|
||||||
jg wloop
|
jg wloop
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1352,25 +1352,25 @@ static int TestSobel(int width, int height, int benchmark_iterations,
|
|||||||
TEST_F(libyuvTest, ARGBSobel_Any) {
|
TEST_F(libyuvTest, ARGBSobel_Any) {
|
||||||
int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_,
|
int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_,
|
||||||
benchmark_iterations_, +1, 0);
|
benchmark_iterations_, +1, 0);
|
||||||
EXPECT_LE(max_diff, 14);
|
EXPECT_EQ(0, max_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, ARGBSobel_Unaligned) {
|
TEST_F(libyuvTest, ARGBSobel_Unaligned) {
|
||||||
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
|
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
|
||||||
benchmark_iterations_, +1, 1);
|
benchmark_iterations_, +1, 1);
|
||||||
EXPECT_LE(max_diff, 14);
|
EXPECT_EQ(0, max_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, ARGBSobel_Invert) {
|
TEST_F(libyuvTest, ARGBSobel_Invert) {
|
||||||
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
|
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
|
||||||
benchmark_iterations_, -1, 0);
|
benchmark_iterations_, -1, 0);
|
||||||
EXPECT_LE(max_diff, 14);
|
EXPECT_EQ(0, max_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, ARGBSobel_Opt) {
|
TEST_F(libyuvTest, ARGBSobel_Opt) {
|
||||||
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
|
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
|
||||||
benchmark_iterations_, +1, 0);
|
benchmark_iterations_, +1, 0);
|
||||||
EXPECT_LE(max_diff, 14);
|
EXPECT_EQ(0, max_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int TestSobelXY(int width, int height, int benchmark_iterations,
|
static int TestSobelXY(int width, int height, int benchmark_iterations,
|
||||||
@ -1415,25 +1415,25 @@ static int TestSobelXY(int width, int height, int benchmark_iterations,
|
|||||||
TEST_F(libyuvTest, ARGBSobelXY_Any) {
|
TEST_F(libyuvTest, ARGBSobelXY_Any) {
|
||||||
int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
|
int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
|
||||||
benchmark_iterations_, +1, 0);
|
benchmark_iterations_, +1, 0);
|
||||||
EXPECT_LE(max_diff, 14);
|
EXPECT_EQ(0, max_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, ARGBSobelXY_Unaligned) {
|
TEST_F(libyuvTest, ARGBSobelXY_Unaligned) {
|
||||||
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
|
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
|
||||||
benchmark_iterations_, +1, 1);
|
benchmark_iterations_, +1, 1);
|
||||||
EXPECT_LE(max_diff, 14);
|
EXPECT_EQ(0, max_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, ARGBSobelXY_Invert) {
|
TEST_F(libyuvTest, ARGBSobelXY_Invert) {
|
||||||
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
|
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
|
||||||
benchmark_iterations_, -1, 0);
|
benchmark_iterations_, -1, 0);
|
||||||
EXPECT_LE(max_diff, 14);
|
EXPECT_EQ(0, max_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, ARGBSobelXY_Opt) {
|
TEST_F(libyuvTest, ARGBSobelXY_Opt) {
|
||||||
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
|
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
|
||||||
benchmark_iterations_, +1, 0);
|
benchmark_iterations_, +1, 0);
|
||||||
EXPECT_LE(max_diff, 14);
|
EXPECT_EQ(0, max_diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -61,4 +61,3 @@ int main(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
fclose(fin1);
|
fclose(fin1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user