mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-09 11:16:43 +08:00
HalfFloat neon armv7 fix for destination pointer.
Improved unittests detect different in arm64 rounding. TEST=util/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=*Half* -a "--libyuv_width=640 --libyuv_height=360" BUG=libyuv:560 R=wangcheng@google.com Review URL: https://codereview.chromium.org/2478313004 .
This commit is contained in:
parent
eca08525cb
commit
f2c27dafa2
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1633
|
Version: 1634
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1633
|
#define LIBYUV_VERSION 1634
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -2742,16 +2742,16 @@ void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
|
|||||||
MEMACCESS(0)
|
MEMACCESS(0)
|
||||||
"vld1.8 {q1}, [%0]! \n" // load 8 shorts
|
"vld1.8 {q1}, [%0]! \n" // load 8 shorts
|
||||||
"subs %2, %2, #8 \n" // 8 pixels per loop
|
"subs %2, %2, #8 \n" // 8 pixels per loop
|
||||||
"vmovl.u8 q2, d2 \n" // 8 int's
|
"vmovl.u16 q2, d2 \n" // 8 int's
|
||||||
"vmovl.u8 q3, d3 \n"
|
"vmovl.u16 q3, d3 \n"
|
||||||
"vcvt.f32.u32 q2, q2 \n" // 8 floats
|
"vcvt.f32.u32 q2, q2 \n" // 8 floats
|
||||||
"vcvt.f32.u32 q3, q3 \n"
|
"vcvt.f32.u32 q3, q3 \n"
|
||||||
"vmul.f32 q2, q2, q0 \n" // adjust exponent
|
"vmul.f32 q2, q2, q0 \n" // adjust exponent
|
||||||
"vmul.f32 q3, q3, q0 \n"
|
"vmul.f32 q3, q3, q0 \n"
|
||||||
"vqshrn.u32 d2, q2, #13 \n" // isolate halffloat
|
"vqshrn.u32 d2, q2, #13 \n" // isolate halffloat
|
||||||
"vqshrn.u32 d3, q3, #13 \n"
|
"vqshrn.u32 d3, q3, #13 \n"
|
||||||
MEMACCESS(1)
|
MEMACCESS(1)
|
||||||
"vst1.8 {q1}, [%0]! \n"
|
"vst1.8 {q1}, [%1]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src), // %0
|
: "+r"(src), // %0
|
||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
@ -2770,16 +2770,16 @@ void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
|
|||||||
MEMACCESS(0)
|
MEMACCESS(0)
|
||||||
"vld1.8 {q1}, [%0]! \n" // load 8 shorts
|
"vld1.8 {q1}, [%0]! \n" // load 8 shorts
|
||||||
"subs %2, %2, #8 \n" // 8 pixels per loop
|
"subs %2, %2, #8 \n" // 8 pixels per loop
|
||||||
"vmovl.u8 q2, d2 \n" // 8 int's
|
"vmovl.u16 q2, d2 \n" // 8 int's
|
||||||
"vmovl.u8 q3, d3 \n"
|
"vmovl.u16 q3, d3 \n"
|
||||||
"vcvt.f32.u32 q2, q2 \n" // 8 floats
|
"vcvt.f32.u32 q2, q2 \n" // 8 floats
|
||||||
"vcvt.f32.u32 q3, q3 \n"
|
"vcvt.f32.u32 q3, q3 \n"
|
||||||
"vmul.f32 q2, q2, q0 \n" // adjust exponent
|
"vmul.f32 q2, q2, q0 \n" // adjust exponent
|
||||||
"vmul.f32 q3, q3, q0 \n"
|
"vmul.f32 q3, q3, q0 \n"
|
||||||
"vqshrn.u32 d2, q2, #13 \n" // isolate halffloat
|
"vqshrn.u32 d2, q2, #13 \n" // isolate halffloat
|
||||||
"vqshrn.u32 d3, q3, #13 \n"
|
"vqshrn.u32 d3, q3, #13 \n"
|
||||||
MEMACCESS(1)
|
MEMACCESS(1)
|
||||||
"vst1.8 {q1}, [%0]! \n"
|
"vst1.8 {q1}, [%1]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src), // %0
|
: "+r"(src), // %0
|
||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
|
|||||||
@ -2711,6 +2711,7 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Caveat - rounds float to half float whereas scaling version truncates.
|
||||||
void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
|
void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -2721,7 +2722,7 @@ void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
|
|||||||
"uxtl2 v3.4s, v1.8h \n"
|
"uxtl2 v3.4s, v1.8h \n"
|
||||||
"scvtf v2.4s, v2.4s \n" // 8 floats
|
"scvtf v2.4s, v2.4s \n" // 8 floats
|
||||||
"scvtf v3.4s, v3.4s \n"
|
"scvtf v3.4s, v3.4s \n"
|
||||||
"fcvtn v1.4h, v2.4s \n" // 8 floatsgit
|
"fcvtn v1.4h, v2.4s \n" // 8 half floats
|
||||||
"fcvtn2 v1.8h, v3.4s \n"
|
"fcvtn2 v1.8h, v3.4s \n"
|
||||||
MEMACCESS(1)
|
MEMACCESS(1)
|
||||||
"st1 {v1.16b}, [%1], #16 \n" // store 8 shorts
|
"st1 {v1.16b}, [%1], #16 \n" // store 8 shorts
|
||||||
|
|||||||
@ -2120,26 +2120,61 @@ int TestHalfFloatPlane(int benchmark_width, int benchmark_height,
|
|||||||
}
|
}
|
||||||
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||||
|
|
||||||
int diff = 0;
|
int max_diff = 0;
|
||||||
for (i = 0; i < y_plane_size; ++i) {
|
for (i = 0; i < y_plane_size / 2; ++i) {
|
||||||
diff = dst_c[i] - dst_opt[i];
|
int abs_diff =
|
||||||
if (diff) break;
|
abs(static_cast<int>(reinterpret_cast<uint16*>(dst_c)[i]) -
|
||||||
|
static_cast<int>(reinterpret_cast<uint16*>(dst_opt)[i]));
|
||||||
|
if (abs_diff > max_diff) {
|
||||||
|
max_diff = abs_diff;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
free_aligned_buffer_page_end(orig_y);
|
free_aligned_buffer_page_end(orig_y);
|
||||||
return diff;
|
return max_diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__arm__)
|
||||||
|
static void EnableFlushDenormalToZero(void) {
|
||||||
|
uint32_t cw;
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"vmrs %0, fpscr \n"
|
||||||
|
"orr %0, %0, #0x1000000 \n"
|
||||||
|
"vmsr fpscr, %0 \n"
|
||||||
|
: "=r"(cw) :: "memory");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
|
// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
|
||||||
// exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
|
// exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
|
||||||
// happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
|
// happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
|
||||||
#define MAXHALFDIFF 0
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
|
||||||
|
// 32 bit arm rounding on denormal case is off by 1 compared to C.
|
||||||
|
#if defined(__arm__)
|
||||||
|
EnableFlushDenormalToZero();
|
||||||
|
#endif
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||||
benchmark_iterations_,
|
benchmark_iterations_,
|
||||||
disable_cpu_flags_, benchmark_cpu_info_,
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
1.0f / 65536.0f, 65535);
|
1.0f / 65536.0f, 65535);
|
||||||
EXPECT_LE(diff, MAXHALFDIFF);
|
EXPECT_EQ(0, diff);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
|
||||||
|
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
|
1.0f, 65535);
|
||||||
|
EXPECT_LE(diff, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
|
||||||
|
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
|
1.0f / 4096.0f, 65535);
|
||||||
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
|
||||||
@ -2147,7 +2182,7 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
|
|||||||
benchmark_iterations_,
|
benchmark_iterations_,
|
||||||
disable_cpu_flags_, benchmark_cpu_info_,
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
1.0f / 1024.0f, 1023);
|
1.0f / 1024.0f, 1023);
|
||||||
EXPECT_LE(diff, MAXHALFDIFF);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
|
||||||
@ -2155,7 +2190,7 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
|
|||||||
benchmark_iterations_,
|
benchmark_iterations_,
|
||||||
disable_cpu_flags_, benchmark_cpu_info_,
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
1.0f / 512.0f, 511);
|
1.0f / 512.0f, 511);
|
||||||
EXPECT_LE(diff, MAXHALFDIFF);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
|
||||||
@ -2163,15 +2198,7 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
|
|||||||
benchmark_iterations_,
|
benchmark_iterations_,
|
||||||
disable_cpu_flags_, benchmark_cpu_info_,
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
1.0f / 4096.0f, 4095);
|
1.0f / 4096.0f, 4095);
|
||||||
EXPECT_LE(diff, MAXHALFDIFF);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
|
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
|
||||||
benchmark_iterations_,
|
|
||||||
disable_cpu_flags_, benchmark_cpu_info_,
|
|
||||||
1.0f, 4095);
|
|
||||||
EXPECT_LE(diff, MAXHALFDIFF);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
|
||||||
@ -2179,9 +2206,24 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
|
|||||||
benchmark_iterations_,
|
benchmark_iterations_,
|
||||||
disable_cpu_flags_, benchmark_cpu_info_,
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
1.0f / 4095.0f, 4095);
|
1.0f / 4095.0f, 4095);
|
||||||
EXPECT_LE(diff, MAXHALFDIFF);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
|
||||||
|
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
|
1.0f, 2047);
|
||||||
|
EXPECT_EQ(0, diff);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
|
||||||
|
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
|
1.0f, 4095);
|
||||||
|
EXPECT_LE(diff, 1);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
|
TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
|
||||||
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
|
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user