mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
ARGBPolynomial for applying a 3 term polynomial matrix to pixels.
BUG=265 TEST=ARGBPolynomial R=thorcarpenter@google.com Review URL: https://webrtc-codereview.appspot.com/2159005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@778 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
2154de414c
commit
ae0091e3a7
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 777
|
Version: 778
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -210,6 +210,20 @@ int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
|
|||||||
const uint8* table_argb,
|
const uint8* table_argb,
|
||||||
int x, int y, int width, int height);
|
int x, int y, int width, int height);
|
||||||
|
|
||||||
|
// Apply a 3 term polynomial to ARGB values.
|
||||||
|
// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
|
||||||
|
// coefficients for b, g, r and a. The 3rd row is coefficients for b squared,
|
||||||
|
// g squared, r squared and a squared. The 4rd row is coefficients for b to
|
||||||
|
// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and
|
||||||
|
// result clamped to 0 to 255.
|
||||||
|
// A polynomial approximation can be dirived using software such as 'R'.
|
||||||
|
|
||||||
|
LIBYUV_API
|
||||||
|
int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
|
const float* poly,
|
||||||
|
int width, int height);
|
||||||
|
|
||||||
// Quantize a rectangle of ARGB. Alpha unaffected.
|
// Quantize a rectangle of ARGB. Alpha unaffected.
|
||||||
// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
|
// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
|
||||||
// interval_size should be a value between 1 and 255.
|
// interval_size should be a value between 1 and 255.
|
||||||
|
|||||||
@ -140,8 +140,11 @@ extern "C" {
|
|||||||
// The following are Windows only:
|
// The following are Windows only:
|
||||||
// TODO(fbarchard): Port to gcc.
|
// TODO(fbarchard): Port to gcc.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||||
|
// Effects:
|
||||||
#define HAS_ARGBCOLORTABLEROW_X86
|
#define HAS_ARGBCOLORTABLEROW_X86
|
||||||
#define HAS_RGBCOLORTABLEROW_X86
|
#define HAS_RGBCOLORTABLEROW_X86
|
||||||
|
#define HAS_ARGBPOLYNOMIALROW_SSE2
|
||||||
|
|
||||||
// Caveat: Visual C 2012 required for AVX2.
|
// Caveat: Visual C 2012 required for AVX2.
|
||||||
#if _MSC_VER >= 1700
|
#if _MSC_VER >= 1700
|
||||||
#define HAS_ARGBSHUFFLEROW_AVX2
|
#define HAS_ARGBSHUFFLEROW_AVX2
|
||||||
@ -320,6 +323,7 @@ typedef __declspec(align(32)) uint32 ulvec32[8];
|
|||||||
typedef __declspec(align(32)) uint8 ulvec8[32];
|
typedef __declspec(align(32)) uint8 ulvec8[32];
|
||||||
|
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
|
// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
|
||||||
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
|
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
|
||||||
typedef int16 __attribute__((vector_size(16))) vec16;
|
typedef int16 __attribute__((vector_size(16))) vec16;
|
||||||
typedef int32 __attribute__((vector_size(16))) vec32;
|
typedef int32 __attribute__((vector_size(16))) vec32;
|
||||||
@ -1542,6 +1546,14 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
|
|||||||
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||||
uint8* dst_argb, int width);
|
uint8* dst_argb, int width);
|
||||||
|
|
||||||
|
void ARGBPolynomialRow_C(const uint8* src_argb,
|
||||||
|
uint8* dst_argb, const float* poly,
|
||||||
|
int width);
|
||||||
|
|
||||||
|
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
|
||||||
|
uint8* dst_argb, const float* poly,
|
||||||
|
int width);
|
||||||
|
|
||||||
// Divide num by div and return as 16.16 fixed point result.
|
// Divide num by div and return as 16.16 fixed point result.
|
||||||
int FixedDiv_C(int num, int div);
|
int FixedDiv_C(int num, int div);
|
||||||
int FixedDiv_X86(int num, int div);
|
int FixedDiv_X86(int num, int div);
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 777
|
#define LIBYUV_VERSION 778
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -2032,6 +2032,38 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply a 4x4 polynomial to each ARGB pixel.
|
||||||
|
LIBYUV_API
|
||||||
|
int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
|
const float* poly,
|
||||||
|
int width, int height) {
|
||||||
|
if (!src_argb || !dst_argb || !poly || width <= 0 || height <= 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Coalesce contiguous rows.
|
||||||
|
if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
|
||||||
|
return ARGBPolynomial(src_argb, 0,
|
||||||
|
dst_argb, 0,
|
||||||
|
poly,
|
||||||
|
width * height, 1);
|
||||||
|
}
|
||||||
|
void (*ARGBPolynomialRow)(const uint8* src_argb,
|
||||||
|
uint8* dst_argb, const float* poly,
|
||||||
|
int width) = ARGBPolynomialRow_C;
|
||||||
|
#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
|
||||||
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
|
ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (int y = 0; y < height; ++y) {
|
||||||
|
ARGBPolynomialRow(src_argb, dst_argb, poly, width);
|
||||||
|
src_argb += src_stride_argb;
|
||||||
|
dst_argb += dst_stride_argb;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -2009,8 +2009,48 @@ void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
|
|||||||
UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
|
UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
|
||||||
I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
|
I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
|
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
|
||||||
#endif // !defined(LIBYUV_DISABLE_X86)
|
#endif // !defined(LIBYUV_DISABLE_X86)
|
||||||
|
|
||||||
|
void ARGBPolynomialRow_C(const uint8* src_argb,
|
||||||
|
uint8* dst_argb, const float* poly,
|
||||||
|
int width) {
|
||||||
|
for (int i = 0; i < width; ++i) {
|
||||||
|
float b = static_cast<float>(src_argb[0]);
|
||||||
|
float g = static_cast<float>(src_argb[1]);
|
||||||
|
float r = static_cast<float>(src_argb[2]);
|
||||||
|
float a = static_cast<float>(src_argb[3]);
|
||||||
|
float b2 = b * b;
|
||||||
|
float g2 = g * g;
|
||||||
|
float r2 = r * r;
|
||||||
|
float a2 = a * a;
|
||||||
|
float db = poly[0] + poly[4] * b;
|
||||||
|
float dg = poly[1] + poly[5] * g;
|
||||||
|
float dr = poly[2] + poly[6] * r;
|
||||||
|
float da = poly[3] + poly[7] * a;
|
||||||
|
db += poly[8] * b2;
|
||||||
|
dg += poly[9] * g2;
|
||||||
|
dr += poly[10] * r2;
|
||||||
|
da += poly[11] * a2;
|
||||||
|
float b3 = b2 * b;
|
||||||
|
float g3 = g2 * g;
|
||||||
|
float r3 = r2 * r;
|
||||||
|
float a3 = a2 * a;
|
||||||
|
db += poly[12] * b3;
|
||||||
|
dg += poly[13] * g3;
|
||||||
|
dr += poly[14] * r3;
|
||||||
|
da += poly[15] * a3;
|
||||||
|
|
||||||
|
dst_argb[0]= Clamp(static_cast<int32>(db));
|
||||||
|
dst_argb[1]= Clamp(static_cast<int32>(dg));
|
||||||
|
dst_argb[2]= Clamp(static_cast<int32>(dr));
|
||||||
|
dst_argb[3]= Clamp(static_cast<int32>(da));
|
||||||
|
src_argb += 4;
|
||||||
|
dst_argb += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#undef clamp0
|
#undef clamp0
|
||||||
#undef clamp255
|
#undef clamp255
|
||||||
|
|
||||||
|
|||||||
@ -6767,6 +6767,53 @@ int FixedDiv_X86(int num, int div) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // HAS_FIXEDDIV_X86
|
#endif // HAS_FIXEDDIV_X86
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
|
||||||
|
uint8* dst_argb, const float* poly,
|
||||||
|
int width) {
|
||||||
|
__asm {
|
||||||
|
mov eax, [esp + 12] /* poly */
|
||||||
|
movdqu xmm4, [eax]
|
||||||
|
movdqu xmm5, [eax + 16]
|
||||||
|
movdqu xmm6, [eax + 32]
|
||||||
|
movdqu xmm7, [eax + 48]
|
||||||
|
|
||||||
|
mov eax, [esp + 4] /* src_argb */
|
||||||
|
mov edx, [esp + 8] /* dst_argb */
|
||||||
|
mov ecx, [esp + 16] /* width */
|
||||||
|
pxor xmm3, xmm3 // 4 bytes to 4 ints
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movd xmm0, [eax] // BGRA
|
||||||
|
lea eax, [eax + 4]
|
||||||
|
punpcklbw xmm0, xmm3
|
||||||
|
punpcklwd xmm0, xmm3
|
||||||
|
cvtdq2ps xmm0, xmm0 // 4 floats
|
||||||
|
movdqa xmm1, xmm0 // X
|
||||||
|
mulps xmm0, xmm5 // C1 * X
|
||||||
|
addps xmm0, xmm4 // result = C0 + C1 * X
|
||||||
|
movdqa xmm2, xmm1
|
||||||
|
mulps xmm2, xmm1 // X * X
|
||||||
|
mulps xmm1, xmm2 // X * X * X
|
||||||
|
mulps xmm2, xmm6 // C2 * X * X
|
||||||
|
mulps xmm1, xmm7 // C3 * X * X * X
|
||||||
|
addps xmm0, xmm2 // result += C2 * X * X
|
||||||
|
addps xmm0, xmm1 // result += C3 * X * X * X
|
||||||
|
cvttps2dq xmm0, xmm0
|
||||||
|
packuswb xmm0, xmm0
|
||||||
|
packuswb xmm0, xmm0
|
||||||
|
sub ecx, 1
|
||||||
|
movd [edx], xmm0
|
||||||
|
lea edx, [edx + 4]
|
||||||
|
jg convertloop
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
|
||||||
|
|
||||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@ -1656,4 +1656,68 @@ TEST_F(libyuvTest, ARGBBlur_Opt) {
|
|||||||
EXPECT_LE(max_diff, 1);
|
EXPECT_LE(max_diff, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(libyuvTest, TestARGBPolynomial) {
|
||||||
|
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
|
||||||
|
SIMD_ALIGNED(uint8 dst_pixels[1280][4]);
|
||||||
|
|
||||||
|
static const float kWarmifyPolynomial[16] = {
|
||||||
|
0.94230f, -3.03300f, -2.92500f, 0.f, // C0
|
||||||
|
0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
|
||||||
|
0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
|
||||||
|
0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
|
||||||
|
};
|
||||||
|
|
||||||
|
// Test blue
|
||||||
|
orig_pixels[0][0] = 255u;
|
||||||
|
orig_pixels[0][1] = 0u;
|
||||||
|
orig_pixels[0][2] = 0u;
|
||||||
|
orig_pixels[0][3] = 128u;
|
||||||
|
// Test green
|
||||||
|
orig_pixels[1][0] = 0u;
|
||||||
|
orig_pixels[1][1] = 255u;
|
||||||
|
orig_pixels[1][2] = 0u;
|
||||||
|
orig_pixels[1][3] = 0u;
|
||||||
|
// Test red
|
||||||
|
orig_pixels[2][0] = 0u;
|
||||||
|
orig_pixels[2][1] = 0u;
|
||||||
|
orig_pixels[2][2] = 255u;
|
||||||
|
orig_pixels[2][3] = 255u;
|
||||||
|
// Test color
|
||||||
|
orig_pixels[3][0] = 16u;
|
||||||
|
orig_pixels[3][1] = 64u;
|
||||||
|
orig_pixels[3][2] = 192u;
|
||||||
|
orig_pixels[3][3] = 224u;
|
||||||
|
// Do 16 to test asm version.
|
||||||
|
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0,
|
||||||
|
&kWarmifyPolynomial[0], 16, 1);
|
||||||
|
EXPECT_EQ(235u, dst_pixels[0][0]);
|
||||||
|
EXPECT_EQ(0u, dst_pixels[0][1]);
|
||||||
|
EXPECT_EQ(0u, dst_pixels[0][2]);
|
||||||
|
EXPECT_EQ(128u, dst_pixels[0][3]);
|
||||||
|
EXPECT_EQ(0u, dst_pixels[1][0]);
|
||||||
|
EXPECT_EQ(233u, dst_pixels[1][1]);
|
||||||
|
EXPECT_EQ(0u, dst_pixels[1][2]);
|
||||||
|
EXPECT_EQ(0u, dst_pixels[1][3]);
|
||||||
|
EXPECT_EQ(0u, dst_pixels[2][0]);
|
||||||
|
EXPECT_EQ(0u, dst_pixels[2][1]);
|
||||||
|
EXPECT_EQ(241u, dst_pixels[2][2]);
|
||||||
|
EXPECT_EQ(255u, dst_pixels[2][3]);
|
||||||
|
EXPECT_EQ(10u, dst_pixels[3][0]);
|
||||||
|
EXPECT_EQ(59u, dst_pixels[3][1]);
|
||||||
|
EXPECT_EQ(188u, dst_pixels[3][2]);
|
||||||
|
EXPECT_EQ(224u, dst_pixels[3][3]);
|
||||||
|
|
||||||
|
for (int i = 0; i < 1280; ++i) {
|
||||||
|
orig_pixels[i][0] = i;
|
||||||
|
orig_pixels[i][1] = i / 2;
|
||||||
|
orig_pixels[i][2] = i / 3;
|
||||||
|
orig_pixels[i][3] = i;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
||||||
|
ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0,
|
||||||
|
&kWarmifyPolynomial[0], 1280, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user