mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
UVScale down by 4 use SSSE3/NEON
Intel SkylakeX Was UVScaleDownBy4_Box (7421 ms) Now UVScaleDownBy4_Box (2496 ms) Pixel4 Was UVScaleDownBy4_Box (3510 ms) Now UVScaleDownBy4_Box (2797 ms) Bug: libuyv:838 Change-Id: Ibbde56e497b0706fbcb7b5ec4a991d40ca17f861 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2469050 Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
d730dc2f18
commit
725c64015d
@ -24,13 +24,28 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Macros to enable specialized scalers
|
// Macros to enable specialized scalers
|
||||||
|
|
||||||
|
#ifndef HAS_SCALEUVDOWN2
|
||||||
#define HAS_SCALEUVDOWN2 1
|
#define HAS_SCALEUVDOWN2 1
|
||||||
|
#endif
|
||||||
|
#ifndef HAS_SCALEUVDOWN4BOX
|
||||||
#define HAS_SCALEUVDOWN4BOX 1
|
#define HAS_SCALEUVDOWN4BOX 1
|
||||||
|
#endif
|
||||||
|
#ifndef HAS_SCALEUVDOWNEVEN
|
||||||
#define HAS_SCALEUVDOWNEVEN 1
|
#define HAS_SCALEUVDOWNEVEN 1
|
||||||
|
#endif
|
||||||
|
#ifndef HAS_SCALEUVBILINEARDOWN
|
||||||
#define HAS_SCALEUVBILINEARDOWN 1
|
#define HAS_SCALEUVBILINEARDOWN 1
|
||||||
|
#endif
|
||||||
|
#ifndef HAS_SCALEUVBILINEARUP
|
||||||
#define HAS_SCALEUVBILINEARUP 1
|
#define HAS_SCALEUVBILINEARUP 1
|
||||||
|
#endif
|
||||||
|
#ifndef HAS_UVCOPY
|
||||||
#define HAS_UVCOPY 1
|
#define HAS_UVCOPY 1
|
||||||
|
#endif
|
||||||
|
#ifndef HAS_SCALEPLANEVERTICAL
|
||||||
#define HAS_SCALEPLANEVERTICAL 1
|
#define HAS_SCALEPLANEVERTICAL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
static __inline int Abs(int v) {
|
static __inline int Abs(int v) {
|
||||||
return v >= 0 ? v : -v;
|
return v >= 0 ? v : -v;
|
||||||
@ -39,7 +54,7 @@ static __inline int Abs(int v) {
|
|||||||
// ScaleUV, 1/2
|
// ScaleUV, 1/2
|
||||||
// This is an optimized version for scaling down a UV to 1/2 of
|
// This is an optimized version for scaling down a UV to 1/2 of
|
||||||
// its original size.
|
// its original size.
|
||||||
#ifdef HAS_SCALEUVDOWN2
|
#if HAS_SCALEUVDOWN2
|
||||||
static void ScaleUVDown2(int src_width,
|
static void ScaleUVDown2(int src_width,
|
||||||
int src_height,
|
int src_height,
|
||||||
int dst_width,
|
int dst_width,
|
||||||
@ -81,6 +96,15 @@ static void ScaleUVDown2(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON) && filtering) {
|
||||||
|
ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
|
||||||
|
if (IS_ALIGNED(dst_width, 8)) {
|
||||||
|
ScaleUVRowDown2 = ScaleUVRowDown2Box_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// This code is not enabled. Only box filter is available at this time.
|
// This code is not enabled. Only box filter is available at this time.
|
||||||
#if defined(HAS_SCALEUVROWDOWN2_SSSE3)
|
#if defined(HAS_SCALEUVROWDOWN2_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
@ -98,14 +122,6 @@ static void ScaleUVDown2(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
|
|
||||||
if (TestCpuFlag(kCpuHasNEON) && filtering) {
|
|
||||||
ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
|
|
||||||
if (IS_ALIGNED(dst_width, 8)) {
|
|
||||||
ScaleUVRowDown2 = ScaleUVRowDown2Box_NEON;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
// This code is not enabled. Only box filter is available at this time.
|
// This code is not enabled. Only box filter is available at this time.
|
||||||
#if defined(HAS_SCALEUVROWDOWN2_NEON)
|
#if defined(HAS_SCALEUVROWDOWN2_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
@ -170,7 +186,7 @@ static void ScaleUVDown2(int src_width,
|
|||||||
// ScaleUV, 1/4
|
// ScaleUV, 1/4
|
||||||
// This is an optimized version for scaling down a UV to 1/4 of
|
// This is an optimized version for scaling down a UV to 1/4 of
|
||||||
// its original size.
|
// its original size.
|
||||||
#ifdef HAS_SCALEUVDOWN4BOX
|
#if HAS_SCALEUVDOWN4BOX
|
||||||
static void ScaleUVDown4Box(int src_width,
|
static void ScaleUVDown4Box(int src_width,
|
||||||
int src_height,
|
int src_height,
|
||||||
int dst_width,
|
int dst_width,
|
||||||
@ -198,15 +214,16 @@ static void ScaleUVDown4Box(int src_width,
|
|||||||
(void)dx;
|
(void)dx;
|
||||||
assert(dx == 65536 * 4); // Test scale factor of 4.
|
assert(dx == 65536 * 4); // Test scale factor of 4.
|
||||||
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
|
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
|
||||||
#if defined(HAS_SCALEUVROWDOWN2_SSSE3)
|
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
|
||||||
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_SSSE3;
|
ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_SSSE3;
|
||||||
if (IS_ALIGNED(dst_width, 4)) {
|
if (IS_ALIGNED(dst_width, 4)) {
|
||||||
ScaleUVRowDown2 = ScaleUVRowDown2Box_SSSE3;
|
ScaleUVRowDown2 = ScaleUVRowDown2Box_SSSE3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEUVROWDOWN2_NEON)
|
#if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
|
ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
|
||||||
if (IS_ALIGNED(dst_width, 8)) {
|
if (IS_ALIGNED(dst_width, 8)) {
|
||||||
@ -230,7 +247,7 @@ static void ScaleUVDown4Box(int src_width,
|
|||||||
// ScaleUV Even
|
// ScaleUV Even
|
||||||
// This is an optimized version for scaling down a UV to even
|
// This is an optimized version for scaling down a UV to even
|
||||||
// multiple of its original size.
|
// multiple of its original size.
|
||||||
#ifdef HAS_SCALEUVDOWNEVEN
|
#if HAS_SCALEUVDOWNEVEN
|
||||||
static void ScaleUVDownEven(int src_width,
|
static void ScaleUVDownEven(int src_width,
|
||||||
int src_height,
|
int src_height,
|
||||||
int dst_width,
|
int dst_width,
|
||||||
@ -256,7 +273,7 @@ static void ScaleUVDownEven(int src_width,
|
|||||||
assert(IS_ALIGNED(src_height, 2));
|
assert(IS_ALIGNED(src_height, 2));
|
||||||
src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
|
src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
|
||||||
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
|
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
|
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
|
||||||
: ScaleUVRowDownEven_Any_SSSE3;
|
: ScaleUVRowDownEven_Any_SSSE3;
|
||||||
if (IS_ALIGNED(dst_width, 4)) {
|
if (IS_ALIGNED(dst_width, 4)) {
|
||||||
@ -308,7 +325,7 @@ static void ScaleUVDownEven(int src_width,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Scale UV down with bilinear interpolation.
|
// Scale UV down with bilinear interpolation.
|
||||||
#ifdef HAS_SCALEUVBILINEARDOWN
|
#if HAS_SCALEUVBILINEARDOWN
|
||||||
static void ScaleUVBilinearDown(int src_width,
|
static void ScaleUVBilinearDown(int src_width,
|
||||||
int src_height,
|
int src_height,
|
||||||
int dst_width,
|
int dst_width,
|
||||||
@ -426,7 +443,7 @@ static void ScaleUVBilinearDown(int src_width,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Scale UV up with bilinear interpolation.
|
// Scale UV up with bilinear interpolation.
|
||||||
#ifdef HAS_SCALEUVBILINEARUP
|
#if HAS_SCALEUVBILINEARUP
|
||||||
static void ScaleUVBilinearUp(int src_width,
|
static void ScaleUVBilinearUp(int src_width,
|
||||||
int src_height,
|
int src_height,
|
||||||
int dst_width,
|
int dst_width,
|
||||||
@ -513,7 +530,7 @@ static void ScaleUVBilinearUp(int src_width,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_SCALEUVCOLS_SSSE3)
|
#if defined(HAS_SCALEUVCOLS_SSSE3)
|
||||||
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
if (!filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||||
ScaleUVFilterCols = ScaleUVCols_SSSE3;
|
ScaleUVFilterCols = ScaleUVCols_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -544,7 +561,7 @@ static void ScaleUVBilinearUp(int src_width,
|
|||||||
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
||||||
ScaleUVFilterCols = ScaleUVColsUp2_C;
|
ScaleUVFilterCols = ScaleUVColsUp2_C;
|
||||||
#if defined(HAS_SCALEUVCOLSUP2_SSSE3)
|
#if defined(HAS_SCALEUVCOLSUP2_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
|
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
|
||||||
ScaleUVFilterCols = ScaleUVColsUp2_SSSE3;
|
ScaleUVFilterCols = ScaleUVColsUp2_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -631,7 +648,7 @@ static void ScaleUVSimple(int src_width,
|
|||||||
(src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
|
(src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
|
||||||
(void)src_height;
|
(void)src_height;
|
||||||
#if defined(HAS_SCALEUVCOLS_SSSE3)
|
#if defined(HAS_SCALEUVCOLS_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||||
ScaleUVCols = ScaleUVCols_SSSE3;
|
ScaleUVCols = ScaleUVCols_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -662,7 +679,7 @@ static void ScaleUVSimple(int src_width,
|
|||||||
if (src_width * 2 == dst_width && x < 0x8000) {
|
if (src_width * 2 == dst_width && x < 0x8000) {
|
||||||
ScaleUVCols = ScaleUVColsUp2_C;
|
ScaleUVCols = ScaleUVColsUp2_C;
|
||||||
#if defined(HAS_SCALEUVCOLSUP2_SSSE3)
|
#if defined(HAS_SCALEUVCOLSUP2_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
|
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
|
||||||
ScaleUVCols = ScaleUVColsUp2_SSSE3;
|
ScaleUVCols = ScaleUVColsUp2_SSSE3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -681,7 +698,7 @@ static void ScaleUVSimple(int src_width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Copy UV with optional flipping
|
// Copy UV with optional flipping
|
||||||
#ifdef HAS_UVCOPY
|
#if HAS_UVCOPY
|
||||||
static int UVCopy(const uint8_t* src_UV,
|
static int UVCopy(const uint8_t* src_UV,
|
||||||
int src_stride_UV,
|
int src_stride_UV,
|
||||||
uint8_t* dst_UV,
|
uint8_t* dst_UV,
|
||||||
@ -758,7 +775,7 @@ static void ScaleUV(const uint8_t* src,
|
|||||||
} else {
|
} else {
|
||||||
// Optimized even scale down. ie 2, 4, 6, 8, 10x.
|
// Optimized even scale down. ie 2, 4, 6, 8, 10x.
|
||||||
if (!(dx & 0x10000) && !(dy & 0x10000)) {
|
if (!(dx & 0x10000) && !(dy & 0x10000)) {
|
||||||
#ifdef HAS_SCALEUVDOWN2
|
#if HAS_SCALEUVDOWN2
|
||||||
if (dx == 0x20000) {
|
if (dx == 0x20000) {
|
||||||
// Optimized 1/2 downsample.
|
// Optimized 1/2 downsample.
|
||||||
ScaleUVDown2(src_width, src_height, clip_width, clip_height,
|
ScaleUVDown2(src_width, src_height, clip_width, clip_height,
|
||||||
@ -767,7 +784,7 @@ static void ScaleUV(const uint8_t* src,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_SCALEUVDOWN4BOX
|
#if HAS_SCALEUVDOWN4BOX
|
||||||
if (dx == 0x40000 && filtering == kFilterBox) {
|
if (dx == 0x40000 && filtering == kFilterBox) {
|
||||||
// Optimized 1/4 box downsample.
|
// Optimized 1/4 box downsample.
|
||||||
ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
|
ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
|
||||||
@ -775,7 +792,7 @@ static void ScaleUV(const uint8_t* src,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_SCALEUVDOWNEVEN
|
#if HAS_SCALEUVDOWNEVEN
|
||||||
ScaleUVDownEven(src_width, src_height, clip_width, clip_height,
|
ScaleUVDownEven(src_width, src_height, clip_width, clip_height,
|
||||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||||
filtering);
|
filtering);
|
||||||
@ -804,7 +821,7 @@ static void ScaleUV(const uint8_t* src,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVBILINEARUP
|
#if HAS_SCALEUVBILINEARUP
|
||||||
if (filtering && dy < 65536) {
|
if (filtering && dy < 65536) {
|
||||||
ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
|
ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
|
||||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||||
@ -812,7 +829,7 @@ static void ScaleUV(const uint8_t* src,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_SCALEUVBILINEARDOWN
|
#if HAS_SCALEUVBILINEARDOWN
|
||||||
if (filtering) {
|
if (filtering) {
|
||||||
ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
|
ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
|
||||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user