BIT_EXACT for unattenuate and attenuate.

- reenable Intel SIMD unaffected by BIT_EXACT
- add bit exact version of ARGBAttenuate, which uses ARM version of formula.
- add bit exact version of ARGBUnatenuate, which mimics the AVX code.

Apply clang format to cleanup code.

Bug: libyuv:908, b/202888439
Change-Id: Ie842b1b3956b48f4190858e61c02998caedc2897
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3224702
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2021-10-15 12:12:02 -07:00 committed by libyuv LUCI CQ
parent 11cbf8f976
commit 55b97cb48f
22 changed files with 179 additions and 107 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1798
Version: 1799
License: BSD
License File: LICENSE

View File

@ -18,7 +18,7 @@ namespace libyuv {
extern "C" {
#endif
#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86

View File

@ -23,7 +23,7 @@ extern "C" {
#endif
// TODO(fbarchard): Move cpu macros to row.h
#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86

View File

@ -18,7 +18,7 @@ namespace libyuv {
extern "C" {
#endif
#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86

View File

@ -20,7 +20,7 @@ namespace libyuv {
extern "C" {
#endif
#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86
@ -74,8 +74,10 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
// Conversions:
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ABGRTOUVROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3
#endif
#define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2
#define HAS_ARGBEXTRACTALPHAROW_SSE2
@ -87,13 +89,15 @@ extern "C" {
#define HAS_ARGBTORGB24ROW_SSSE3
#define HAS_ARGBTORGB565DITHERROW_SSE2
#define HAS_ARGBTORGB565ROW_SSE2
#define HAS_ARGBTOYJROW_SSSE3
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ARGBTOUV444ROW_SSSE3
#define HAS_ARGBTOUVJROW_SSSE3
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBTOYJROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
#define HAS_BGRATOUVROW_SSSE3
#define HAS_BGRATOYROW_SSSE3
#endif
#define HAS_COPYROW_ERMS
#define HAS_COPYROW_SSE2
#define HAS_H422TOARGBROW_SSSE3
@ -119,14 +123,16 @@ extern "C" {
#define HAS_NV21TORGB24ROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
#define HAS_RAWTORGB24ROW_SSSE3
#define HAS_RAWTOYROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_RAWTOYROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3
#define HAS_RGB24TOYJROW_SSSE3
#define HAS_RAWTOYJROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
#define HAS_RGBATOUVROW_SSSE3
#define HAS_RGBATOYROW_SSSE3
#endif
#define HAS_SETROW_ERMS
#define HAS_SETROW_X86
#define HAS_SPLITUVROW_SSE2
@ -142,7 +148,9 @@ extern "C" {
// Effects:
#define HAS_ARGBADDROW_SSE2
#define HAS_ARGBAFFINEROW_SSE2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ARGBATTENUATEROW_SSSE3
#endif
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBCOLORTABLEROW_X86
@ -192,12 +200,14 @@ extern "C" {
#define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2
#define HAS_ARGBTORGB565DITHERROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
#define HAS_RAWTOYJROW_AVX2
#define HAS_RGB24TOYJROW_AVX2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ARGBTOUVJROW_AVX2
#define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2
#define HAS_RGB24TOYJROW_AVX2
#define HAS_RAWTOYJROW_AVX2
#endif
#define HAS_COPYROW_AVX
#define HAS_H422TOARGBROW_AVX2
#define HAS_HALFFLOATROW_AVX2
@ -230,7 +240,9 @@ extern "C" {
// Effects:
#define HAS_ARGBADDROW_AVX2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ARGBATTENUATEROW_AVX2
#endif
#define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
@ -297,7 +309,9 @@ extern "C" {
#define HAS_P410TOARGBROW_SSSE3
#define HAS_RAWTORGBAROW_SSSE3
#define HAS_RGB24MIRRORROW_SSSE3
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_RGBATOYJROW_SSSE3
#endif
#define HAS_SPLITARGBROW_SSE2
#define HAS_SPLITARGBROW_SSSE3
#define HAS_SPLITXRGBROW_SSE2
@ -319,8 +333,10 @@ extern "C" {
(defined(__x86_64__) || defined(__i386__)) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_ABGRTOAR30ROW_AVX2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_ABGRTOUVROW_AVX2
#define HAS_ABGRTOYROW_AVX2
#endif
#define HAS_ARGBTOAR30ROW_AVX2
#define HAS_ARGBTORAWROW_AVX2
#define HAS_ARGBTORGB24ROW_AVX2
@ -356,7 +372,9 @@ extern "C" {
#define HAS_MERGEUVROW_16_AVX2
#define HAS_MIRRORUVROW_AVX2
#define HAS_MULTIPLYROW_16_AVX2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_RGBATOYJROW_AVX2
#endif
#define HAS_SPLITARGBROW_AVX2
#define HAS_SPLITXRGBROW_AVX2
#define HAS_SPLITUVROW_16_AVX2

View File

@ -19,7 +19,7 @@ namespace libyuv {
extern "C" {
#endif
#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
#if defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1798
#define LIBYUV_VERSION 1799
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -2004,16 +2004,22 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
@ -2347,16 +2353,22 @@ int RAWToJ420(const uint8_t* src_raw,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;

View File

@ -1830,16 +1830,22 @@ int ARGBToJ420(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
@ -1939,16 +1945,22 @@ int ARGBToJ422(const uint8_t* src_argb,
height = 1;
src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0;
}
#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;

View File

@ -974,6 +974,8 @@ ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15)
ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15)
ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15)
ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15)
#endif
#ifdef HAS_YUY2TOYROW_SSE2
ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15)
ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
#endif

View File

@ -28,14 +28,20 @@ extern "C" {
// The following macro from row_win makes the C code match the row_win code,
// which is 7 bit fixed point for ARGBToI420:
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
!defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
defined(_MSC_VER) && !defined(__clang__) && \
(defined(_M_IX86) || defined(_M_X64))
#define LIBYUV_RGB7 1
#endif
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86))
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86))
#define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1
#define LIBYUV_ATTENUATE_DUP 1
#endif
#if defined(LIBYUV_BIT_EXACT)
#define LIBYUV_UNATTENUATE_DUP 1
#endif
// llvm x86 is poor at ternary operator, so use branchless min/max.
@ -3151,11 +3157,11 @@ void BlendPlaneRow_C(const uint8_t* src0,
}
#undef UBLEND
#if defined(__aarch64__) || defined(__arm__)
#define ATTENUATE(f, a) (f * a + 128) >> 8
#else
#if LIBYUV_ATTENUATE_DUP
// This code mimics the SSSE3 version for better testability.
#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
#else
#define ATTENUATE(f, a) (f * a + 128) >> 8
#endif
// Multiply source RGB by alpha and store to destination.
@ -3242,6 +3248,14 @@ const uint32_t fixed_invtbl8[256] = {
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
#undef T
#if LIBYUV_UNATTENUATE_DUP
// This code mimics the Intel SIMD version for better testability.
#define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
#else
#define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
#endif
// mimics the Intel SIMD code for exactness.
void ARGBUnattenuateRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
@ -3252,13 +3266,11 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb,
uint32_t r = src_argb[2];
const uint32_t a = src_argb[3];
const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
b = (b * ia) >> 8;
g = (g * ia) >> 8;
r = (r * ia) >> 8;
// Clamping should not be necessary but is free in assembly.
dst_argb[0] = clamp255(b);
dst_argb[1] = clamp255(g);
dst_argb[2] = clamp255(r);
dst_argb[0] = UNATTENUATE(b, ia);
dst_argb[1] = UNATTENUATE(g, ia);
dst_argb[2] = UNATTENUATE(r, ia);
dst_argb[3] = a;
src_argb += 4;
dst_argb += 4;

View File

@ -1417,9 +1417,12 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
}
#endif // HAS_RGBATOYJROW_SSSE3
#ifdef HAS_ARGBTOYROW_AVX2
#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
// vpermd for vphaddw + vpackuswb vpermd.
static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
#endif
#ifdef HAS_ARGBTOYROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {

View File

@ -1683,7 +1683,6 @@ void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
: "cc", "memory", "v0", "v1", "v16", "v17", "v18", "v19", "v23");
}
void ARGBToAR64Row_NEON(const uint8_t* src_argb,
uint16_t* dst_ar64,
int width) {

View File

@ -1829,7 +1829,8 @@ static void ScalePlaneSimple(int src_width,
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, dx);
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
dst_ptr += dst_stride;
y += dy;
}
@ -1870,7 +1871,8 @@ static void ScalePlaneSimple_16(int src_width,
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, dx);
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
dst_ptr += dst_stride;
y += dy;
}

View File

@ -888,8 +888,8 @@ static void ScaleARGBSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
}
@ -973,8 +973,8 @@ static void ScaleARGB(const uint8_t* src,
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4, src_stride,
dst, dst_stride, clip_width, clip_height);
ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}
}

View File

@ -690,7 +690,8 @@ void ScaleUVLinearUp2(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
@ -796,7 +797,8 @@ void ScaleUVLinearUp2_16(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
@ -927,7 +929,8 @@ static void ScaleUVSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x, dx);
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
dst_uv += dst_stride;
y += dy;
}
@ -1061,8 +1064,8 @@ static void ScaleUV(const uint8_t* src,
#ifdef HAS_UVCOPY
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2, src_stride, dst,
dst_stride, clip_width, clip_height);
UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}
#endif
@ -1163,12 +1166,13 @@ int UVScale_16(const uint16_t* src_uv,
#ifdef HAS_UVCOPY
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
if (dst_height == 1) {
UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv, src_stride_uv,
dst_uv, dst_stride_uv, dst_width, dst_height);
UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
} else {
dy = src_height / dst_height;
UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv, dy * (int64_t)src_stride_uv,
dst_uv, dst_stride_uv, dst_width, dst_height);
UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
dst_height);
}
return 0;

View File

@ -22,7 +22,8 @@ namespace libyuv {
// TODO(fbarchard): clang x86 has a higher accuracy YUV to RGB.
// Port to Visual C and other CPUs
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__))
#define ERROR_FULL 5
#define ERROR_J420 4
#else

View File

@ -4140,15 +4140,15 @@ TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
const int kSize = 256;
align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2);
int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / (kSize * 2) * benchmark_iterations_;
int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) /
(kSize * 2) * benchmark_iterations_;
for (int i = 0; i < kSize * 3 * 2; ++i) {
orig_rgb24[i] = i;
}
for (int i = 0; i < iterations256; ++i) {
RGB24ToJ420(orig_rgb24, kSize * 3,
dest_j420, kSize, // Y plane
RGB24ToJ420(orig_rgb24, kSize * 3, dest_j420, kSize, // Y plane
dest_j420 + kSize * 2, kSize / 2, // U plane
dest_j420 + kSize * 5 / 2, kSize / 2, // V plane
kSize, 2);

View File

@ -29,6 +29,12 @@
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
#endif
#if defined(LIBYUV_BIT_EXACT)
#define EXPECTED_ATTENUATE_DIFF 0
#else
#define EXPECTED_ATTENUATE_DIFF 2
#endif
namespace libyuv {
TEST_F(LibYUVPlanarTest, TestAttenuate) {
@ -100,9 +106,9 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) {
EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
EXPECT_NEAR(255, atten_pixels[255 * 4 + 0], 1);
EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], 1);
EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], 1);
EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
free_aligned_buffer_page_end(atten2_pixels);
@ -158,28 +164,29 @@ TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
static int TestUnattenuateI(int width,
@ -231,28 +238,28 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 1);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, -1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 2);
EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {

View File

@ -524,8 +524,8 @@ TEST_F(LibYUVScaleTest, ARGBTest3x) {
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations160 =
(benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * benchmark_iterations_;
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
@ -561,8 +561,8 @@ TEST_F(LibYUVScaleTest, ARGBTest4x) {
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations160 =
(benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * benchmark_iterations_;
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);

View File

@ -1418,8 +1418,8 @@ TEST_F(LibYUVScaleTest, PlaneTest3x) {
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations160 =
(benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * benchmark_iterations_;
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
@ -1446,8 +1446,8 @@ TEST_F(LibYUVScaleTest, PlaneTest4x) {
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations160 =
(benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * benchmark_iterations_;
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);

View File

@ -224,8 +224,8 @@ TEST_F(LibYUVScaleTest, UVTest3x) {
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations160 =
(benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * benchmark_iterations_;
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);
@ -255,8 +255,8 @@ TEST_F(LibYUVScaleTest, UVTest4x) {
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations160 =
(benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 * benchmark_iterations_;
int iterations160 = (benchmark_width_ * benchmark_height_ + (160 - 1)) / 160 *
benchmark_iterations_;
for (int i = 0; i < iterations160; ++i) {
UVScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterBilinear);