mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Reintroduce the max version of scale
add ScaleMaxSamples_NEON function with max done on original values. TBR=kjellander@chromium.org BUG=libyuv:717 TEST=LibYUVPlanarTest.TestScaleMaxSamples_Opt Change-Id: Id99338860782b10ffd24f66242eb42014c2e229e Reviewed-on: https://chromium-review.googlesource.com/614685 Reviewed-by: Frank Barchard <fbarchard@google.com> Reviewed-by: Cheng Wang <wangcheng@google.com>
This commit is contained in:
parent
dbd7c1a9c5
commit
56bbcdf422
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1665
|
||||
Version: 1666
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -3178,6 +3178,11 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
|
||||
const uint8* luma,
|
||||
uint32 lumacoeff);
|
||||
|
||||
float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width);
|
||||
float ScaleMaxSamples_NEON(const float* src,
|
||||
float* dst,
|
||||
float scale,
|
||||
int width);
|
||||
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width);
|
||||
float ScaleSumSamples_NEON(const float* src,
|
||||
float* dst,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1665
|
||||
#define LIBYUV_VERSION 1666
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -2640,16 +2640,28 @@ void NV12ToRGB565Row_AVX2(const uint8* src_y,
|
||||
#endif
|
||||
|
||||
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
|
||||
float fmax = 0.f;
|
||||
float fsum = 0.f;
|
||||
int i;
|
||||
#if defined(__clang__)
|
||||
#pragma clang loop vectorize_width(4)
|
||||
#endif
|
||||
for (i = 0; i < width; ++i) {
|
||||
float v = *src++;
|
||||
fmax += v * v;
|
||||
fsum += v * v;
|
||||
*dst++ = v * scale;
|
||||
}
|
||||
return fsum;
|
||||
}
|
||||
|
||||
float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
|
||||
float fmax = 0.f;
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
float v = *src++;
|
||||
float vs = v * scale;
|
||||
fmax = (v > fmax) ? v : fmax;
|
||||
*dst++ = vs;
|
||||
}
|
||||
return fmax;
|
||||
}
|
||||
|
||||
|
||||
@ -2612,6 +2612,36 @@ void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
|
||||
: "cc", "memory", "v1", "v2", "v3");
|
||||
}
|
||||
|
||||
float ScaleMaxSamples_NEON(const float* src,
|
||||
float* dst,
|
||||
float scale,
|
||||
int width) {
|
||||
float fmax;
|
||||
asm volatile(
|
||||
"movi v5.4s, #0 \n" // max
|
||||
"movi v6.4s, #0 \n"
|
||||
|
||||
"1: \n"
|
||||
"ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop
|
||||
"fmul v3.4s, v1.4s, %4.s[0] \n" // scale
|
||||
"fmul v4.4s, v2.4s, %4.s[0] \n" // scale
|
||||
"fmax v5.4s, v5.4s, v1.4s \n" // max
|
||||
"fmax v6.4s, v6.4s, v2.4s \n"
|
||||
"st1 {v3.4s, v4.4s}, [%1], #32 \n" // store 8 samples
|
||||
"b.gt 1b \n"
|
||||
"fmax v5.4s, v5.4s, v6.4s \n" // max
|
||||
"fmaxv %s3, v5.4s \n" // signed max acculator
|
||||
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width), // %2
|
||||
"=w"(fmax) // %3
|
||||
: "w"(scale) // %4
|
||||
: "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
|
||||
return fmax;
|
||||
}
|
||||
|
||||
float ScaleSumSamples_NEON(const float* src,
|
||||
float* dst,
|
||||
float scale,
|
||||
|
||||
@ -2521,6 +2521,74 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
|
||||
free_aligned_buffer_page_end(dst_pixels_c);
|
||||
}
|
||||
|
||||
float TestScaleMaxSamples(int benchmark_width,
|
||||
int benchmark_height,
|
||||
int benchmark_iterations,
|
||||
float scale,
|
||||
bool opt) {
|
||||
int i, j;
|
||||
float max_c, max_opt = 0.f;
|
||||
const int y_plane_size = benchmark_width * benchmark_height * 4;
|
||||
|
||||
align_buffer_page_end(orig_y, y_plane_size * 3);
|
||||
uint8* dst_opt = orig_y + y_plane_size;
|
||||
uint8* dst_c = orig_y + y_plane_size * 2;
|
||||
|
||||
// Randomize works but may contain some denormals affecting performance.
|
||||
// MemRandomize(orig_y, y_plane_size);
|
||||
for (i = 0; i < y_plane_size / 4; ++i) {
|
||||
(reinterpret_cast<float*>(orig_y))[i] = (i - y_plane_size / 8) * 3.1415f;
|
||||
}
|
||||
memset(dst_c, 0, y_plane_size);
|
||||
memset(dst_opt, 1, y_plane_size);
|
||||
|
||||
max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
|
||||
reinterpret_cast<float*>(dst_c), scale,
|
||||
benchmark_width * benchmark_height);
|
||||
|
||||
for (j = 0; j < benchmark_iterations; j++) {
|
||||
if (opt) {
|
||||
#ifdef HAS_SCALESUMSAMPLES_NEON
|
||||
max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
|
||||
reinterpret_cast<float*>(dst_opt), scale,
|
||||
benchmark_width * benchmark_height);
|
||||
#else
|
||||
max_opt = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
|
||||
reinterpret_cast<float*>(dst_opt), scale,
|
||||
benchmark_width * benchmark_height);
|
||||
#endif
|
||||
} else {
|
||||
max_opt = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
|
||||
reinterpret_cast<float*>(dst_opt), scale,
|
||||
benchmark_width * benchmark_height);
|
||||
}
|
||||
}
|
||||
|
||||
float max_diff = FAbs(max_opt - max_c);
|
||||
for (i = 0; i < y_plane_size / 4; ++i) {
|
||||
float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
|
||||
(reinterpret_cast<float*>(dst_opt)[i]));
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(orig_y);
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
|
||||
float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, 1.2f, false);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
|
||||
float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, 1.2f, true);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
float TestScaleSumSamples(int benchmark_width,
|
||||
int benchmark_height,
|
||||
int benchmark_iterations,
|
||||
@ -2632,7 +2700,7 @@ float TestScaleSamples(int benchmark_width,
|
||||
}
|
||||
}
|
||||
|
||||
float max_diff =0.f;
|
||||
float max_diff = 0.f;
|
||||
for (i = 0; i < y_plane_size / 4; ++i) {
|
||||
float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
|
||||
(reinterpret_cast<float*>(dst_opt)[i]));
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user