mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
FixedDiv1 using a single 64/32 divide. Removes size restriction from slope.
BUG=302 TESTED=libyuv scale tests R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/6489004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@940 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
277378723a
commit
5dba58cb1e
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 939
|
||||
Version: 941
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale.h"
|
||||
#include "libyuv/scale_argb.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
#include "libyuv/version.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
|
||||
@ -104,7 +104,6 @@ extern "C" {
|
||||
#define HAS_COPYROW_ERMS
|
||||
#define HAS_COPYROW_SSE2
|
||||
#define HAS_COPYROW_X86
|
||||
#define HAS_FIXEDDIV_X86
|
||||
#define HAS_HALFROW_SSE2
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I411TOARGBROW_SSSE3
|
||||
@ -1684,15 +1683,6 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
||||
int width, const uint8* luma,
|
||||
const uint32 lumacoeff);
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv_C(int num, int div);
|
||||
int FixedDiv_X86(int num, int div);
|
||||
#ifdef HAS_FIXEDDIV_X86
|
||||
#define FixedDiv FixedDiv_X86
|
||||
#else
|
||||
#define FixedDiv FixedDiv_C
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
@ -33,6 +33,8 @@ extern "C" {
|
||||
#define HAS_SCALEARGBCOLS_SSE2
|
||||
#define HAS_SCALEARGBFILTERCOLS_SSSE3
|
||||
#define HAS_SCALEARGBCOLSUP2_SSE2
|
||||
#define HAS_FIXEDDIV_X86
|
||||
#define HAS_FIXEDDIV1_X86
|
||||
#endif
|
||||
|
||||
// The following are available on Neon platforms:
|
||||
@ -61,17 +63,31 @@ void ScalePlaneVertical(int src_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int y, int dy,
|
||||
int bpp, FilterMode filtering);
|
||||
int bpp, enum FilterMode filtering);
|
||||
|
||||
// Simplify the filtering based on scale factors.
|
||||
FilterMode ScaleFilterReduce(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
FilterMode filtering);
|
||||
enum FilterMode ScaleFilterReduce(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv_C(int num, int div);
|
||||
int FixedDiv_X86(int num, int div);
|
||||
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
|
||||
int FixedDiv1_C(int num, int div);
|
||||
int FixedDiv1_X86(int num, int div);
|
||||
#ifdef HAS_FIXEDDIV_X86
|
||||
#define FixedDiv FixedDiv_X86
|
||||
#define FixedDiv1 FixedDiv1_X86
|
||||
#else
|
||||
#define FixedDiv FixedDiv_C
|
||||
#define FixedDiv1 FixedDiv1_C
|
||||
#endif
|
||||
|
||||
// Compute slope values for stepping.
|
||||
void ScaleSlope(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
FilterMode filtering,
|
||||
enum FilterMode filtering,
|
||||
int* x, int* y, int* dx, int* dy);
|
||||
|
||||
void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 939
|
||||
#define LIBYUV_VERSION 941
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -59,11 +59,6 @@ static __inline uint32 Abs(int32 v) {
|
||||
}
|
||||
#endif // USE_BRANCHLESS
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv_C(int num, int div) {
|
||||
return static_cast<int>((static_cast<int64>(num) << 16) / div);
|
||||
}
|
||||
|
||||
#ifdef LIBYUV_LITTLE_ENDIAN
|
||||
#define WRITEWORD(p, v) *reinterpret_cast<uint32*>(p) = v
|
||||
#else
|
||||
|
||||
@ -6170,23 +6170,6 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
|
||||
}
|
||||
#endif // HAS_I422TOUYVYROW_SSE2
|
||||
|
||||
#ifdef HAS_FIXEDDIV_X86
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv_X86(int num, int div) {
|
||||
asm volatile (
|
||||
"cdq \n"
|
||||
"shld $0x10,%%eax,%%edx \n"
|
||||
"shl $0x10,%%eax \n"
|
||||
"idiv %1 \n"
|
||||
"mov %0, %%eax \n"
|
||||
: "+a"(num) // %0
|
||||
: "c"(div) // %1
|
||||
: "memory", "cc", "edx"
|
||||
);
|
||||
return num;
|
||||
}
|
||||
#endif // HAS_FIXEDDIV_X86
|
||||
|
||||
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
|
||||
uint8* dst_argb, const float* poly,
|
||||
|
||||
@ -7009,21 +7009,6 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_FIXEDDIV_X86
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
int FixedDiv_X86(int num, int div) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // num
|
||||
cdq // extend num to 64 bits
|
||||
shld edx, eax, 16 // 32.16
|
||||
shl eax, 16
|
||||
idiv dword ptr [esp + 8]
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_FIXEDDIV_X86
|
||||
|
||||
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
|
||||
|
||||
@ -584,9 +584,18 @@ FilterMode ScaleFilterReduce(int src_width, int src_height,
|
||||
return filtering;
|
||||
}
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv_C(int num, int div) {
|
||||
return static_cast<int>((static_cast<int64>(num) << 16) / div);
|
||||
}
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv1_C(int num, int div) {
|
||||
return static_cast<int>(((static_cast<int64>(num) << 16) - 0x00010001) /
|
||||
(div - 1));
|
||||
}
|
||||
|
||||
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
|
||||
#define FIXEDDIV1(src, dst) FixedDiv((src << 16) - 0x00010001, \
|
||||
(dst << 16) - 0x00010000);
|
||||
|
||||
// Compute slope values for stepping.
|
||||
void ScaleSlope(int src_width, int src_height,
|
||||
@ -613,14 +622,14 @@ void ScaleSlope(int src_width, int src_height,
|
||||
*dx = FixedDiv(Abs(src_width), dst_width);
|
||||
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
|
||||
} else if (dst_width > 1) {
|
||||
*dx = FIXEDDIV1(Abs(src_width), dst_width);
|
||||
*dx = FixedDiv1(Abs(src_width), dst_width);
|
||||
*x = 0;
|
||||
}
|
||||
if (dst_height <= src_height) {
|
||||
*dy = FixedDiv(src_height, dst_height);
|
||||
*y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
|
||||
} else if (dst_height > 1) {
|
||||
*dy = FIXEDDIV1(src_height, dst_height);
|
||||
*dy = FixedDiv1(src_height, dst_height);
|
||||
*y = 0;
|
||||
}
|
||||
} else if (filtering == kFilterLinear) {
|
||||
@ -629,7 +638,7 @@ void ScaleSlope(int src_width, int src_height,
|
||||
*dx = FixedDiv(Abs(src_width), dst_width);
|
||||
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
|
||||
} else if (dst_width > 1) {
|
||||
*dx = FIXEDDIV1(Abs(src_width), dst_width);
|
||||
*dx = FixedDiv1(Abs(src_width), dst_width);
|
||||
*x = 0;
|
||||
}
|
||||
*dy = FixedDiv(src_height, dst_height);
|
||||
@ -649,7 +658,6 @@ void ScaleSlope(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#undef CENTERSTART
|
||||
#undef FIXEDDIV1
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -1274,6 +1274,39 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
||||
);
|
||||
}
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv_X86(int num, int div) {
|
||||
asm volatile (
|
||||
"cdq \n"
|
||||
"shld $0x10,%%eax,%%edx \n"
|
||||
"shl $0x10,%%eax \n"
|
||||
"idiv %1 \n"
|
||||
"mov %0, %%eax \n"
|
||||
: "+a"(num) // %0
|
||||
: "c"(div) // %1
|
||||
: "memory", "cc", "edx"
|
||||
);
|
||||
return num;
|
||||
}
|
||||
|
||||
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
|
||||
int FixedDiv1_X86(int num, int div) {
|
||||
asm volatile (
|
||||
"cdq \n"
|
||||
"shld $0x10,%%eax,%%edx \n"
|
||||
"shl $0x10,%%eax \n"
|
||||
"sub $0x10001,%%eax \n"
|
||||
"sbb $0x0,%%edx \n"
|
||||
"sub $0x1,%1 \n"
|
||||
"idiv %1 \n"
|
||||
"mov %0, %%eax \n"
|
||||
: "+a"(num) // %0
|
||||
: "c"(div) // %1
|
||||
: "memory", "cc", "edx"
|
||||
);
|
||||
return num;
|
||||
}
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -1281,6 +1281,36 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
int FixedDiv_X86(int num, int div) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // num
|
||||
cdq // extend num to 64 bits
|
||||
shld edx, eax, 16 // 32.16
|
||||
shl eax, 16
|
||||
idiv dword ptr [esp + 8]
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
__declspec(naked) __declspec(align(16))
|
||||
int FixedDiv1_X86(int num, int div) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // num
|
||||
mov ecx, [esp + 8] // denom
|
||||
cdq // extend num to 64 bits
|
||||
shld edx, eax, 16 // 32.16
|
||||
shl eax, 16
|
||||
sub eax, 0x00010001
|
||||
sbb edx, 0
|
||||
sub ecx, 1
|
||||
idiv ecx
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -14,6 +14,8 @@
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
|
||||
namespace libyuv {
|
||||
@ -27,7 +29,7 @@ TEST_F(libyuvTest, TestFixedDiv) {
|
||||
EXPECT_EQ(0x10000, libyuv::FixedDiv(1, 1));
|
||||
EXPECT_EQ(0x7fff0000, libyuv::FixedDiv(0x7fff, 1));
|
||||
// TODO(fbarchard): Avoid the following that throw exceptions.
|
||||
// EXPECT_EQ(0x10000, libyuv::FixedDiv(0x10000, 1));
|
||||
// EXPECT_EQ(0x100000000, libyuv::FixedDiv(0x10000, 1));
|
||||
// EXPECT_EQ(0x80000000, libyuv::FixedDiv(0x8000, 1));
|
||||
|
||||
EXPECT_EQ(0x20000, libyuv::FixedDiv(640 * 2, 640));
|
||||
@ -118,4 +120,39 @@ TEST_F(libyuvTest, TestFixedDiv_Opt) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(libyuvTest, TestFixedDiv1_Opt) {
|
||||
int num[1280];
|
||||
int div[1280];
|
||||
int result_opt[1280];
|
||||
int result_c[1280];
|
||||
|
||||
srandom(time(NULL));
|
||||
MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num));
|
||||
MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div));
|
||||
for (int j = 0; j < 1280; ++j) {
|
||||
num[j] &= 4095; // Make numerator smaller.
|
||||
div[j] &= 4095; // Make divisor smaller.
|
||||
if (div[j] <= 1) {
|
||||
div[j] = 1280;
|
||||
}
|
||||
}
|
||||
|
||||
int has_x86 = TestCpuFlag(kCpuHasX86);
|
||||
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
||||
if (has_x86) {
|
||||
for (int j = 0; j < 1280; ++j) {
|
||||
result_opt[j] = libyuv::FixedDiv1(num[j], div[j]);
|
||||
}
|
||||
} else {
|
||||
for (int j = 0; j < 1280; ++j) {
|
||||
result_opt[j] = libyuv::FixedDiv1_C(num[j], div[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < 1280; ++j) {
|
||||
result_c[j] = libyuv::FixedDiv1_C(num[j], div[j]);
|
||||
EXPECT_NEAR(result_c[j], result_opt[j], 1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user