clamp C functions use compare

Intel
Was ARGBSubtract_Opt (1760 ms)
Now ARGBSubtract_Opt (1546 ms)

ARM
Was ARGBAdd_Opt (1747 ms)
Now ARGBAdd_Opt (1260 ms)

Bug: None
Change-Id: I52436f6390b6b7313f2a8820833bb4f60ae958be
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2299639
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2020-07-16 12:10:46 -07:00 committed by Commit Bot
parent 1837f0022e
commit 6d603ec3f5
8 changed files with 47 additions and 43 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1762
Version: 1763
License: BSD
License File: LICENSE

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1762
#define LIBYUV_VERSION 1763
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -39,19 +39,19 @@ extern "C" {
#define USE_BRANCHLESS 1
#if USE_BRANCHLESS
static __inline int32_t clamp0(int32_t v) {
return ((-(v) >> 31) & (v));
return -(v >= 0) & v;
}
// TODO(fbarchard): make clamp255 preserve negative values.
static __inline int32_t clamp255(int32_t v) {
return (((255 - (v)) >> 31) | (v)) & 255;
return (-(v >= 255) | v) & 255;
}
static __inline int32_t clamp1023(int32_t v) {
return (((1023 - (v)) >> 31) | (v)) & 1023;
return (-(v >= 1023) | v) & 1023;
}
static __inline uint32_t Abs(int32_t v) {
int m = v >> 31;
int m = -(v < 0);
return (v + m) ^ m;
}
#else // USE_BRANCHLESS

View File

@ -2611,13 +2611,13 @@ void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038);
v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013);
v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080);
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
for (x = 0; x < width; x += 16) {
READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001);
ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038,
const_0x250013, const_0x008080, shuffler0, shuffler1,
shuffler2, shuffler3, dst0, dst1);
const_0x250013, const_0x008080, shuffler0, shuffler1, shuffler2,
shuffler3, dst0, dst1);
*((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0);
*((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0);
s += 64;
@ -2646,13 +2646,13 @@ void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038);
v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013);
v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080);
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
for (x = 0; x < width; x += 16) {
READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001);
ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038,
const_0x250013, const_0x008080, shuffler0, shuffler1,
shuffler2, shuffler3, dst0, dst1);
const_0x250013, const_0x008080, shuffler0, shuffler1, shuffler2,
shuffler3, dst0, dst1);
*((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0);
*((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0);
s += 64;
@ -2681,13 +2681,13 @@ void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
v8u16 const_0x000038 = (v8u16)__msa_fill_w(0x0038);
v8u16 const_0x250013 = (v8u16)__msa_fill_w(0x250013);
v4u32 const_0x008080 = (v4u32)__msa_fill_w(0x8080);
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
v8u16 const_0x0001 = (v8u16)__msa_fill_h(0x0001);
for (x = 0; x < width; x += 16) {
READ_ARGB(s, t, src0, src1, src2, src3, const_0x0001);
ARGBTOUV_H(src0, src1, src2, src3, const_0x09002f, const_0x000038,
const_0x250013, const_0x008080, shuffler0, shuffler1,
shuffler2, shuffler3, dst0, dst1);
const_0x250013, const_0x008080, shuffler0, shuffler1, shuffler2,
shuffler3, dst0, dst1);
*((uint64_t*)dst_v) = __msa_copy_u_d((v2i64)dst0, 0);
*((uint64_t*)dst_u) = __msa_copy_u_d((v2i64)dst1, 0);
s += 64;

View File

@ -20,21 +20,19 @@
namespace libyuv {
// TODO(fbarchard): Port high accuracy YUV to RGB to Neon.
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define ERROR_R 1
#define ERROR_G 1
#define ERROR_B 3
#define ERROR_FULL 6
#define ERROR_J420 6
#else
#define ERROR_R 1
#define ERROR_G 1
#define ERROR_B 3
// TODO(fbarchard): clang x86 has a higher accuracy YUV to RGB.
// Port to Visual C and other CPUs
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#define ERROR_FULL 5
#define ERROR_J420 4
#else
#define ERROR_FULL 6
#define ERROR_J420 6
#endif
#define ERROR_R 1
#define ERROR_G 1
#define ERROR_B 3
#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \
TEST_F(LibYUVColorTest, TESTNAME) { \

View File

@ -720,18 +720,24 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Premult, +, 0, 1)
#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, l, m)
#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, l, m)
#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, l, m)
#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, l, m)
#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, l, m)
#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, l, m)
#define J420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
l, m)
#define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \
l, m)
#define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
l, m)
#define H420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \
l, m)
#define U420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
l, m)
#define U420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \
I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuv2020Constants, k, \
l, m)
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1)
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1)

View File

@ -12,7 +12,7 @@
#include "libyuv/cpu_id.h"
#if defined(__clang__)
#if defined(__clang__) && !defined(__wasm__)
#if __has_include(<pthread.h>)
#define LIBYUV_HAVE_PTHREAD 1
#endif
@ -30,7 +30,7 @@ namespace libyuv {
void* ThreadMain(void* arg) {
int* flags = static_cast<int*>(arg);
*flags = TestCpuFlag(kCpuHasSSSE3);
*flags = TestCpuFlag(kCpuInitialized);
return nullptr;
}
#endif // LIBYUV_HAVE_PTHREAD

View File

@ -305,8 +305,8 @@ TEST_SCALETO(ARGBScale, 1, 1)
TEST_SCALETO(ARGBScale, 320, 240)
TEST_SCALETO(ARGBScale, 569, 480)
TEST_SCALETO(ARGBScale, 640, 360)
TEST_SCALETO(ARGBScale, 1280, 720)
#ifdef ENABLE_SLOW_TESTS
TEST_SCALETO(ARGBScale, 1280, 720)
TEST_SCALETO(ARGBScale, 1920, 1080)
#endif // ENABLE_SLOW_TESTS
#undef TEST_SCALETO1