mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-16 15:19:52 +08:00
Unattenuate AVX2
BUG=190 TEST=planar_test Review URL: https://webrtc-codereview.appspot.com/1112004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@577 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
d5ee3dc912
commit
3c7bb050bd
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 576
|
Version: 577
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -140,6 +140,7 @@ extern "C" {
|
|||||||
|
|
||||||
// Effects
|
// Effects
|
||||||
#define HAS_ARGBATTENUATEROW_AVX2
|
#define HAS_ARGBATTENUATEROW_AVX2
|
||||||
|
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1324,6 +1325,7 @@ void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
|
|||||||
extern uint32 fixed_invtbl8[256];
|
extern uint32 fixed_invtbl8[256];
|
||||||
void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
|
||||||
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
|||||||
@ -145,11 +145,9 @@ LIBYUV_API
|
|||||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||||
const uint8* src_b, int stride_b,
|
const uint8* src_b, int stride_b,
|
||||||
int width, int height) {
|
int width, int height) {
|
||||||
|
|
||||||
if (stride_a == width && stride_b == width) {
|
if (stride_a == width && stride_b == width) {
|
||||||
return ComputeSumSquareError(src_a, src_b, width * height);
|
return ComputeSumSquareError(src_a, src_b, width * height);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
|
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
|
||||||
SumSquareError_C;
|
SumSquareError_C;
|
||||||
#if defined(HAS_SUMSQUAREERROR_NEON)
|
#if defined(HAS_SUMSQUAREERROR_NEON)
|
||||||
|
|||||||
@ -1085,6 +1085,14 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
|||||||
ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
|
ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
|
||||||
|
bool clear = false;
|
||||||
|
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
|
||||||
|
bool clear = true;
|
||||||
|
ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// TODO(fbarchard): Neon version.
|
||||||
|
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
ARGBUnattenuateRow(src_argb, dst_argb, width);
|
ARGBUnattenuateRow(src_argb, dst_argb, width);
|
||||||
|
|||||||
@ -1528,7 +1528,7 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
// 8.16 fixed point inverse table
|
// 8.16 fixed point inverse table
|
||||||
#define T(a) 0x10000 / a
|
#define T(a) 0x10000 / a
|
||||||
uint32 fixed_invtbl8[256] = {
|
uint32 fixed_invtbl8[256] = {
|
||||||
0x0100, T(0x01), T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
|
0xffff, 0xffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
|
||||||
T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
|
T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
|
||||||
T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
|
T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
|
||||||
T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
|
T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
|
||||||
|
|||||||
@ -4462,6 +4462,53 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBUNATTENUATEROW_SSE2
|
#endif // HAS_ARGBUNATTENUATEROW_SSE2
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBUNATTENUATEROW_AVX2
|
||||||
|
// Shuffle table duplicating alpha.
|
||||||
|
static const ulvec8 kUnattenShuffleAlpha_AVX2 = {
|
||||||
|
0u, 1u, 0u, 1u, 0u, 1u, 128u, 128u,
|
||||||
|
8u, 9u, 8u, 9u, 8u, 9u, 128u, 128u,
|
||||||
|
0u, 1u, 0u, 1u, 0u, 1u, 128u, 128u,
|
||||||
|
8u, 9u, 8u, 9u, 8u, 9u, 128u, 128u,
|
||||||
|
};
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
int width) {
|
||||||
|
__asm {
|
||||||
|
mov eax, [esp + 4] // src_argb0
|
||||||
|
mov edx, [esp + 8] // dst_argb
|
||||||
|
mov ecx, [esp + 12] // width
|
||||||
|
sub edx, eax
|
||||||
|
vmovdqa ymm4, kUnattenShuffleAlpha_AVX2
|
||||||
|
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000
|
||||||
|
vpslld ymm5, ymm5, 24
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
vmovdqu ymm6, [eax] // read 8 pixels.
|
||||||
|
vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xffffffff for gather.
|
||||||
|
vpsrld ymm2, ymm6, 24 // alpha in low 8 bits.
|
||||||
|
vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated.
|
||||||
|
vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated.
|
||||||
|
vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm7 // ymm7 cleared.
|
||||||
|
vpunpcklwd ymm2, ymm3, ymm7 // low 4 inverted alphas. mutated.
|
||||||
|
vpunpckhwd ymm3, ymm3, ymm7 // high 4 inverted alphas. mutated.
|
||||||
|
vpshufb ymm2, ymm2, ymm4 // replicate low 4 alphas
|
||||||
|
vpshufb ymm3, ymm3, ymm4 // replicate high 4 alphas
|
||||||
|
vpmulhuw ymm0, ymm0, ymm2 // rgb * ia
|
||||||
|
vpmulhuw ymm1, ymm1, ymm3 // rgb * ia
|
||||||
|
vpand ymm6, ymm6, ymm5 // isolate alpha
|
||||||
|
vpackuswb ymm0, ymm0, ymm1 // unmutated.
|
||||||
|
vpor ymm0, ymm0, ymm6 // copy original alpha
|
||||||
|
sub ecx, 8
|
||||||
|
vmovdqu [eax + edx], ymm0
|
||||||
|
lea eax, [eax + 32]
|
||||||
|
jg convertloop
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBATTENUATEROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBGRAYROW_SSSE3
|
#ifdef HAS_ARGBGRAYROW_SSSE3
|
||||||
// Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R
|
// Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R
|
||||||
static const vec8 kARGBToGray = {
|
static const vec8 kARGBToGray = {
|
||||||
|
|||||||
150
source/scale.cc
150
source/scale.cc
@ -42,13 +42,7 @@ void SetUseReferenceImpl(bool use) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ScaleRowDown2Int also used by planar functions
|
// ScaleRowDown2Int also used by planar functions
|
||||||
|
// NEON downscalers with interpolation.
|
||||||
/**
|
|
||||||
* NEON downscalers with interpolation.
|
|
||||||
*
|
|
||||||
* Provided by Fritz Koenig
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||||
#define HAS_SCALEROWDOWN2_NEON
|
#define HAS_SCALEROWDOWN2_NEON
|
||||||
@ -98,13 +92,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
|
|||||||
const uint8* src_ptr, ptrdiff_t src_stride,
|
const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
int dst_width, int source_y_fraction);
|
int dst_width, int source_y_fraction);
|
||||||
|
|
||||||
/**
|
// SSE2 downscalers with interpolation.
|
||||||
* SSE2 downscalers with interpolation.
|
|
||||||
*
|
|
||||||
* Provided by Frank Barchard (fbarchard@google.com)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Constants for SSSE3 code
|
// Constants for SSSE3 code
|
||||||
#elif !defined(YUV_DISABLE_ASM) && \
|
#elif !defined(YUV_DISABLE_ASM) && \
|
||||||
(defined(_M_IX86) || defined(__i386__) || defined(__x86_64__))
|
(defined(_M_IX86) || defined(__i386__) || defined(__x86_64__))
|
||||||
@ -2630,13 +2618,10 @@ void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane, 1/2
|
||||||
* Scale plane, 1/2
|
// This is an optimized version for scaling down a plane to 1/2 of
|
||||||
*
|
// its original size.
|
||||||
* This is an optimized version for scaling down a plane to 1/2 of
|
|
||||||
* its original size.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
|
static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -2676,12 +2661,10 @@ static void ScalePlaneDown2(int /* src_width */, int /* src_height */,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane, 1/4
|
||||||
* Scale plane, 1/4
|
// This is an optimized version for scaling down a plane to 1/4 of
|
||||||
*
|
// its original size.
|
||||||
* This is an optimized version for scaling down a plane to 1/4 of
|
|
||||||
* its original size.
|
|
||||||
*/
|
|
||||||
static void ScalePlaneDown4(int /* src_width */, int /* src_height */,
|
static void ScalePlaneDown4(int /* src_width */, int /* src_height */,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -2717,13 +2700,10 @@ static void ScalePlaneDown4(int /* src_width */, int /* src_height */,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane, 1/8
|
||||||
* Scale plane, 1/8
|
// This is an optimized version for scaling down a plane to 1/8
|
||||||
*
|
// of its original size.
|
||||||
* This is an optimized version for scaling down a plane to 1/8
|
|
||||||
* of its original size.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static void ScalePlaneDown8(int /* src_width */, int /* src_height */,
|
static void ScalePlaneDown8(int /* src_width */, int /* src_height */,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -2748,12 +2728,8 @@ static void ScalePlaneDown8(int /* src_width */, int /* src_height */,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane down, 3/4
|
||||||
* Scale plane down, 3/4
|
|
||||||
*
|
|
||||||
* Provided by Frank Barchard (fbarchard@google.com)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
|
static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -2839,23 +2815,22 @@ static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Scale plane, 3/8
|
// Scale plane, 3/8
|
||||||
*
|
// This is an optimized version for scaling down a plane to 3/8
|
||||||
* This is an optimized version for scaling down a plane to 3/8
|
// of its original size.
|
||||||
* of its original size.
|
//
|
||||||
*
|
// Uses box filter arranges like this
|
||||||
* Uses box filter arranges like this
|
// aaabbbcc -> abc
|
||||||
* aaabbbcc -> abc
|
// aaabbbcc def
|
||||||
* aaabbbcc def
|
// aaabbbcc ghi
|
||||||
* aaabbbcc ghi
|
// dddeeeff
|
||||||
* dddeeeff
|
// dddeeeff
|
||||||
* dddeeeff
|
// dddeeeff
|
||||||
* dddeeeff
|
// ggghhhii
|
||||||
* ggghhhii
|
// ggghhhii
|
||||||
* ggghhhii
|
// Boxes are 3x3, 2x3, 3x2 and 2x2
|
||||||
* Boxes are 3x3, 2x3, 3x2 and 2x2
|
|
||||||
*/
|
|
||||||
static void ScalePlaneDown38(int /* src_width */, int /* src_height */,
|
static void ScalePlaneDown38(int /* src_width */, int /* src_height */,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -2991,15 +2966,14 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane down to any dimensions, with interpolation.
|
||||||
* Scale plane down to any dimensions, with interpolation.
|
// (boxfilter).
|
||||||
* (boxfilter).
|
//
|
||||||
*
|
// Same method as SimpleScale, which is fixed point, outputting
|
||||||
* Same method as SimpleScale, which is fixed point, outputting
|
// one pixel of destination using fixed point (16.16) to step
|
||||||
* one pixel of destination using fixed point (16.16) to step
|
// through source, sampling a box of pixel with simple
|
||||||
* through source, sampling a box of pixel with simple
|
// averaging.
|
||||||
* averaging.
|
|
||||||
*/
|
|
||||||
static void ScalePlaneBox(int src_width, int src_height,
|
static void ScalePlaneBox(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -3008,8 +2982,6 @@ static void ScalePlaneBox(int src_width, int src_height,
|
|||||||
assert(dst_height > 0);
|
assert(dst_height > 0);
|
||||||
int dx = (src_width << 16) / dst_width;
|
int dx = (src_width << 16) / dst_width;
|
||||||
int dy = (src_height << 16) / dst_height;
|
int dy = (src_height << 16) / dst_height;
|
||||||
// int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1);
|
|
||||||
// int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1);
|
|
||||||
int x = 0;
|
int x = 0;
|
||||||
int y = 0;
|
int y = 0;
|
||||||
int maxy = (src_height << 16);
|
int maxy = (src_height << 16);
|
||||||
@ -3063,9 +3035,8 @@ static void ScalePlaneBox(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane to/from any dimensions, with interpolation.
|
||||||
* Scale plane to/from any dimensions, with interpolation.
|
|
||||||
*/
|
|
||||||
static void ScalePlaneBilinearSimple(int src_width, int src_height,
|
static void ScalePlaneBilinearSimple(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -3104,10 +3075,9 @@ static void ScalePlaneBilinearSimple(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Scale plane to/from any dimensions, with bilinear
|
// Scale plane to/from any dimensions, with bilinear interpolation.
|
||||||
* interpolation.
|
|
||||||
*/
|
|
||||||
void ScalePlaneBilinear(int src_width, int src_height,
|
void ScalePlaneBilinear(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -3170,12 +3140,11 @@ void ScalePlaneBilinear(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane to/from any dimensions, without interpolation.
|
||||||
* Scale plane to/from any dimensions, without interpolation.
|
// Fixed point math is used for performance: The upper 16 bits
|
||||||
* Fixed point math is used for performance: The upper 16 bits
|
// of x and dx is the integer part of the source position and
|
||||||
* of x and dx is the integer part of the source position and
|
// the lower 16 bits are the fixed decimal part.
|
||||||
* the lower 16 bits are the fixed decimal part.
|
|
||||||
*/
|
|
||||||
static void ScalePlaneSimple(int src_width, int src_height,
|
static void ScalePlaneSimple(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -3197,9 +3166,8 @@ static void ScalePlaneSimple(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane to/from any dimensions.
|
||||||
* Scale plane to/from any dimensions.
|
|
||||||
*/
|
|
||||||
static void ScalePlaneAnySize(int src_width, int src_height,
|
static void ScalePlaneAnySize(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -3215,14 +3183,12 @@ static void ScalePlaneAnySize(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Scale plane down, any size
|
||||||
* Scale plane down, any size
|
//
|
||||||
*
|
// This is an optimized version for scaling down a plane to any size.
|
||||||
* This is an optimized version for scaling down a plane to any size.
|
// The current implementation is ~10 times faster compared to the
|
||||||
* The current implementation is ~10 times faster compared to the
|
// reference implementation for e.g. XGA->LowResPAL
|
||||||
* reference implementation for e.g. XGA->LowResPAL
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static void ScalePlaneDown(int src_width, int src_height,
|
static void ScalePlaneDown(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
|
|||||||
@ -44,10 +44,6 @@ void ScaleARGBFilterRows_NEON(uint8* dst_ptr,
|
|||||||
int dst_width, int source_y_fraction);
|
int dst_width, int source_y_fraction);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
|
||||||
* SSE2 downscalers with bilinear interpolation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
||||||
|
|
||||||
#define HAS_SCALEARGBROWDOWN2_SSE2
|
#define HAS_SCALEARGBROWDOWN2_SSE2
|
||||||
@ -880,13 +876,10 @@ void ScaleARGBFilterRows_C(uint8* dst_argb, const uint8* src_argb,
|
|||||||
dst_argb[3] = dst_argb[-1];
|
dst_argb[3] = dst_argb[-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// ScaleARGB ARGB, 1/2
|
||||||
* ScaleARGB ARGB, 1/2
|
// This is an optimized version for scaling down a ARGB to 1/2 of
|
||||||
*
|
// its original size.
|
||||||
* This is an optimized version for scaling down a ARGB to 1/2 of
|
|
||||||
* its original size.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
|
static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -918,13 +911,10 @@ static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// ScaleARGB ARGB Even
|
||||||
* ScaleARGB ARGB Even
|
// This is an optimized version for scaling down a ARGB to even
|
||||||
*
|
// multiple of its original size.
|
||||||
* This is an optimized version for scaling down a ARGB to even
|
|
||||||
* multiple of its original size.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static void ScaleARGBDownEven(int src_width, int src_height,
|
static void ScaleARGBDownEven(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
@ -959,10 +949,9 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
|||||||
dst_argb += dst_stride;
|
dst_argb += dst_stride;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* ScaleARGB ARGB to/from any dimensions, with bilinear
|
// ScaleARGB ARGB to/from any dimensions, with bilinear
|
||||||
* interpolation.
|
// interpolation.
|
||||||
*/
|
|
||||||
|
|
||||||
// Maximum width handled by 2 pass Bilinear.
|
// Maximum width handled by 2 pass Bilinear.
|
||||||
static const int kMaxInputWidth = 2560;
|
static const int kMaxInputWidth = 2560;
|
||||||
@ -1033,12 +1022,11 @@ static void ScaleARGBCols(uint8* dst_argb, const uint8* src_argb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* ScaleARGB ARGB to/from any dimensions, without interpolation.
|
// ScaleARGB ARGB to/from any dimensions, without interpolation.
|
||||||
* Fixed point math is used for performance: The upper 16 bits
|
// Fixed point math is used for performance: The upper 16 bits
|
||||||
* of x and dx is the integer part of the source position and
|
// of x and dx is the integer part of the source position and
|
||||||
* the lower 16 bits are the fixed decimal part.
|
// the lower 16 bits are the fixed decimal part.
|
||||||
*/
|
|
||||||
|
|
||||||
static void ScaleARGBSimple(int src_width, int src_height,
|
static void ScaleARGBSimple(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
@ -1056,9 +1044,8 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// ScaleARGB ARGB to/from any dimensions.
|
||||||
* ScaleARGB ARGB to/from any dimensions.
|
|
||||||
*/
|
|
||||||
static void ScaleARGBAnySize(int src_width, int src_height,
|
static void ScaleARGBAnySize(int src_width, int src_height,
|
||||||
int dst_width, int dst_height,
|
int dst_width, int dst_height,
|
||||||
int src_stride, int dst_stride,
|
int src_stride, int dst_stride,
|
||||||
|
|||||||
@ -167,6 +167,72 @@ TEST_F(libyuvTest, ARGBAttenuate_Opt) {
|
|||||||
EXPECT_LE(max_diff, 2);
|
EXPECT_LE(max_diff, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int TestUnattenuateI(int width, int height, int benchmark_iterations,
|
||||||
|
int invert, int off) {
|
||||||
|
const int kBpp = 4;
|
||||||
|
const int kStride = (width * kBpp + 15) & ~15;
|
||||||
|
align_buffer_64(src_argb, kStride * height + off);
|
||||||
|
align_buffer_64(dst_argb_c, kStride * height);
|
||||||
|
align_buffer_64(dst_argb_opt, kStride * height);
|
||||||
|
srandom(time(NULL));
|
||||||
|
for (int i = 0; i < kStride * height; ++i) {
|
||||||
|
src_argb[i + off] = (random() & 0xff);
|
||||||
|
}
|
||||||
|
ARGBAttenuate(src_argb + off, kStride,
|
||||||
|
src_argb + off, kStride,
|
||||||
|
width, height);
|
||||||
|
memset(dst_argb_c, 0, kStride * height);
|
||||||
|
memset(dst_argb_opt, 0, kStride * height);
|
||||||
|
|
||||||
|
MaskCpuFlags(0);
|
||||||
|
ARGBUnattenuate(src_argb + off, kStride,
|
||||||
|
dst_argb_c, kStride,
|
||||||
|
width, invert * height);
|
||||||
|
MaskCpuFlags(-1);
|
||||||
|
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||||
|
ARGBUnattenuate(src_argb + off, kStride,
|
||||||
|
dst_argb_opt, kStride,
|
||||||
|
width, invert * height);
|
||||||
|
}
|
||||||
|
int max_diff = 0;
|
||||||
|
for (int i = 0; i < kStride * height; ++i) {
|
||||||
|
int abs_diff =
|
||||||
|
abs(static_cast<int>(dst_argb_c[i]) -
|
||||||
|
static_cast<int>(dst_argb_opt[i]));
|
||||||
|
if (abs_diff > max_diff) {
|
||||||
|
max_diff = abs_diff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free_aligned_buffer_64(src_argb)
|
||||||
|
free_aligned_buffer_64(dst_argb_c)
|
||||||
|
free_aligned_buffer_64(dst_argb_opt)
|
||||||
|
return max_diff;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(libyuvTest, ARGBUnattenuate_Any) {
|
||||||
|
int max_diff = TestUnattenuateI(benchmark_width_ - 1, benchmark_height_,
|
||||||
|
benchmark_iterations_, +1, 0);
|
||||||
|
EXPECT_LE(max_diff, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(libyuvTest, ARGBUnattenuate_Unaligned) {
|
||||||
|
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_, +1, 1);
|
||||||
|
EXPECT_LE(max_diff, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(libyuvTest, ARGBUnattenuate_Invert) {
|
||||||
|
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_, -1, 0);
|
||||||
|
EXPECT_LE(max_diff, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(libyuvTest, ARGBUnattenuate_Opt) {
|
||||||
|
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_, +1, 0);
|
||||||
|
EXPECT_LE(max_diff, 2);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, TestARGBComputeCumulativeSum) {
|
TEST_F(libyuvTest, TestARGBComputeCumulativeSum) {
|
||||||
SIMD_ALIGNED(uint8 orig_pixels[16][16][4]);
|
SIMD_ALIGNED(uint8 orig_pixels[16][16][4]);
|
||||||
SIMD_ALIGNED(int32 added_pixels[16][16][4]);
|
SIMD_ALIGNED(int32 added_pixels[16][16][4]);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user