mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Change ARGBColorMatrix to a 4x4.
BUG=none TEST=planar_unitest updates R=johannkoenig@google.com, ryanpetrie@google.com, thorcarpenter@google.com Review URL: https://webrtc-codereview.appspot.com/2320008 git-svn-id: http://libyuv.googlecode.com/svn/trunk@805 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
b99bcab7f7
commit
c99db063e2
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 804
|
Version: 805
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -186,16 +186,33 @@ LIBYUV_API
|
|||||||
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
|
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
|
||||||
int x, int y, int width, int height);
|
int x, int y, int width, int height);
|
||||||
|
|
||||||
|
// Deprecated. Use ARGBColorMatrix instead.
|
||||||
// Apply a matrix rotation to each ARGB pixel.
|
// Apply a matrix rotation to each ARGB pixel.
|
||||||
// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
|
// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
|
||||||
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
|
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
|
||||||
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
|
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
|
||||||
// The last 4 coefficients apply to B, G, R, A and produce R of the output.
|
// The last 4 coefficients apply to B, G, R, A and produce R of the output.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
|
int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
|
||||||
const int8* matrix_argb,
|
const int8* matrix_rgb,
|
||||||
int x, int y, int width, int height);
|
int x, int y, int width, int height);
|
||||||
|
|
||||||
|
// Temporary API mapper.
|
||||||
|
// #define ARGBColorMatrix(d, s, m, x, y, w, h) \
|
||||||
|
// RGBColorMatrix(d, s, m, x, y, w, h)
|
||||||
|
|
||||||
|
// Apply a matrix rotation to each ARGB pixel.
|
||||||
|
// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
|
||||||
|
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
|
||||||
|
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
|
||||||
|
// The next 4 coefficients apply to B, G, R, A and produce R of the output.
|
||||||
|
// The last 4 coefficients apply to B, G, R, A and produce A of the output.
|
||||||
|
LIBYUV_API
|
||||||
|
int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
|
const int8* matrix_argb,
|
||||||
|
int width, int height);
|
||||||
|
|
||||||
// Apply a color table each ARGB pixel.
|
// Apply a color table each ARGB pixel.
|
||||||
// Table contains 256 ARGB values.
|
// Table contains 256 ARGB values.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
@ -1456,11 +1456,12 @@ void ARGBSepiaRow_C(uint8* dst_argb, int width);
|
|||||||
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
|
void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
|
||||||
void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
|
void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
|
||||||
|
|
||||||
void ARGBColorMatrixRow_C(uint8* dst_argb, const int8* matrix_argb, int width);
|
void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
|
||||||
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
const int8* matrix_argb, int width);
|
||||||
int width);
|
void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
||||||
void ARGBColorMatrixRow_NEON(uint8* dst_argb, const int8* matrix_argb,
|
const int8* matrix_argb, int width);
|
||||||
int width);
|
void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
const int8* matrix_argb, int width);
|
||||||
|
|
||||||
void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
|
void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
|
||||||
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
|
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 804
|
#define LIBYUV_VERSION 805
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -1345,24 +1345,31 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply a 4x3 matrix rotation to each ARGB pixel.
|
// Apply a 4x4 matrix to each ARGB pixel.
|
||||||
|
// Note: Normally for shading, but can be used to swizzle or invert.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
|
int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
|
||||||
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
const int8* matrix_argb,
|
const int8* matrix_argb,
|
||||||
int dst_x, int dst_y, int width, int height) {
|
int width, int height) {
|
||||||
if (!dst_argb || !matrix_argb || width <= 0 || height <= 0 ||
|
if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
|
||||||
dst_x < 0 || dst_y < 0) {
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
if (height < 0) {
|
||||||
|
height = -height;
|
||||||
|
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||||
|
src_stride_argb = -src_stride_argb;
|
||||||
|
}
|
||||||
// Coalesce contiguous rows.
|
// Coalesce contiguous rows.
|
||||||
if (dst_stride_argb == width * 4) {
|
if (src_stride_argb == width * 4 &&
|
||||||
return ARGBColorMatrix(dst_argb, dst_stride_argb,
|
dst_stride_argb == width * 4) {
|
||||||
|
return ARGBColorMatrix(src_argb, 0,
|
||||||
|
dst_argb, 0,
|
||||||
matrix_argb,
|
matrix_argb,
|
||||||
dst_x, dst_y,
|
|
||||||
width * height, 1);
|
width * height, 1);
|
||||||
}
|
}
|
||||||
void (*ARGBColorMatrixRow)(uint8* dst_argb, const int8* matrix_argb,
|
void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
|
||||||
int width) = ARGBColorMatrixRow_C;
|
const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
|
||||||
#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
|
#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
|
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
|
||||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||||
@ -1373,14 +1380,48 @@ int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
|
|||||||
ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
|
ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
|
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
ARGBColorMatrixRow(dst, matrix_argb, width);
|
ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
|
||||||
dst += dst_stride_argb;
|
src_argb += src_stride_argb;
|
||||||
|
dst_argb += dst_stride_argb;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply a 4x3 matrix to each ARGB pixel.
|
||||||
|
// Deprecated.
|
||||||
|
LIBYUV_API
|
||||||
|
int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
|
||||||
|
const int8* matrix_rgb,
|
||||||
|
int dst_x, int dst_y, int width, int height) {
|
||||||
|
if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
|
||||||
|
dst_x < 0 || dst_y < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
|
||||||
|
SIMD_ALIGNED(int8 matrix_argb[16]);
|
||||||
|
matrix_argb[0] = matrix_rgb[0] / 2;
|
||||||
|
matrix_argb[1] = matrix_rgb[1] / 2;
|
||||||
|
matrix_argb[2] = matrix_rgb[2] / 2;
|
||||||
|
matrix_argb[3] = matrix_rgb[3] / 2;
|
||||||
|
matrix_argb[4] = matrix_rgb[4] / 2;
|
||||||
|
matrix_argb[5] = matrix_rgb[5] / 2;
|
||||||
|
matrix_argb[6] = matrix_rgb[6] / 2;
|
||||||
|
matrix_argb[7] = matrix_rgb[7] / 2;
|
||||||
|
matrix_argb[8] = matrix_rgb[8] / 2;
|
||||||
|
matrix_argb[9] = matrix_rgb[9] / 2;
|
||||||
|
matrix_argb[10] = matrix_rgb[10] / 2;
|
||||||
|
matrix_argb[11] = matrix_rgb[11] / 2;
|
||||||
|
matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
|
||||||
|
matrix_argb[15] = 64; // 1.0
|
||||||
|
|
||||||
|
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
|
||||||
|
return ARGBColorMatrix(const_cast<const uint8*>(dst), dst_stride_argb,
|
||||||
|
dst, dst_stride_argb,
|
||||||
|
&matrix_argb[0], width, height);
|
||||||
|
}
|
||||||
|
|
||||||
// Apply a color table each ARGB pixel.
|
// Apply a color table each ARGB pixel.
|
||||||
// Table contains 256 ARGB values.
|
// Table contains 256 ARGB values.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
@ -654,21 +654,27 @@ void ARGBSepiaRow_C(uint8* dst_argb, int width) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Apply color matrix to a row of image. Matrix is signed.
|
// Apply color matrix to a row of image. Matrix is signed.
|
||||||
void ARGBColorMatrixRow_C(uint8* dst_argb, const int8* matrix_argb, int width) {
|
// TODO(fbarchard): Consider adding rounding (+32).
|
||||||
|
void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
const int8* matrix_argb, int width) {
|
||||||
for (int x = 0; x < width; ++x) {
|
for (int x = 0; x < width; ++x) {
|
||||||
int b = dst_argb[0];
|
int b = src_argb[0];
|
||||||
int g = dst_argb[1];
|
int g = src_argb[1];
|
||||||
int r = dst_argb[2];
|
int r = src_argb[2];
|
||||||
int a = dst_argb[3];
|
int a = src_argb[3];
|
||||||
int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
|
int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
|
||||||
r * matrix_argb[2] + a * matrix_argb[3]) >> 7;
|
r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
|
||||||
int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
|
int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
|
||||||
r * matrix_argb[6] + a * matrix_argb[7]) >> 7;
|
r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
|
||||||
int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
|
int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
|
||||||
r * matrix_argb[10] + a * matrix_argb[11]) >> 7;
|
r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
|
||||||
|
int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
|
||||||
|
r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
|
||||||
dst_argb[0] = Clamp(sb);
|
dst_argb[0] = Clamp(sb);
|
||||||
dst_argb[1] = Clamp(sg);
|
dst_argb[1] = Clamp(sg);
|
||||||
dst_argb[2] = Clamp(sr);
|
dst_argb[2] = Clamp(sr);
|
||||||
|
dst_argb[3] = Clamp(sa);
|
||||||
|
src_argb += 4;
|
||||||
dst_argb += 4;
|
dst_argb += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2474,18 +2474,19 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
|
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
|
||||||
// Same as Sepia except matrix is provided.
|
// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
|
||||||
void ARGBColorMatrixRow_NEON(uint8* dst_argb, const int8* matrix_argb,
|
// needs to saturate. Consider doing a non-saturating version.
|
||||||
int width) {
|
void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
const int8* matrix_argb, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"vld1.8 {q2}, [%2] \n" // load 3 ARGB vectors.
|
"vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors.
|
||||||
"vmovl.s8 q0, d4 \n" // B,G coefficients s16.
|
"vmovl.s8 q0, d4 \n" // B,G coefficients s16.
|
||||||
"vmovl.s8 q1, d5 \n" // R coefficients s16.
|
"vmovl.s8 q1, d5 \n" // R,A coefficients s16.
|
||||||
|
|
||||||
".p2align 2 \n"
|
".p2align 2 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d16, d18, d20, d22}, [%0] \n" // load 8 ARGB pixels.
|
"vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels.
|
||||||
"subs %1, %1, #8 \n" // 8 processed per loop.
|
"subs %2, %2, #8 \n" // 8 processed per loop.
|
||||||
"vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit
|
"vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit
|
||||||
"vmovl.u8 q9, d18 \n" // g
|
"vmovl.u8 q9, d18 \n" // g
|
||||||
"vmovl.u8 q10, d20 \n" // r
|
"vmovl.u8 q10, d20 \n" // r
|
||||||
@ -2493,33 +2494,42 @@ void ARGBColorMatrixRow_NEON(uint8* dst_argb, const int8* matrix_argb,
|
|||||||
"vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B
|
"vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B
|
||||||
"vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G
|
"vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G
|
||||||
"vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R
|
"vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R
|
||||||
|
"vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A
|
||||||
"vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B
|
"vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B
|
||||||
"vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G
|
"vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G
|
||||||
"vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R
|
"vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R
|
||||||
|
"vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A
|
||||||
"vqadd.s16 q12, q12, q4 \n" // Accumulate B
|
"vqadd.s16 q12, q12, q4 \n" // Accumulate B
|
||||||
"vqadd.s16 q13, q13, q5 \n" // Accumulate G
|
"vqadd.s16 q13, q13, q5 \n" // Accumulate G
|
||||||
"vqadd.s16 q14, q14, q6 \n" // Accumulate R
|
"vqadd.s16 q14, q14, q6 \n" // Accumulate R
|
||||||
|
"vqadd.s16 q15, q15, q7 \n" // Accumulate A
|
||||||
"vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B
|
"vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B
|
||||||
"vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G
|
"vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G
|
||||||
"vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R
|
"vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R
|
||||||
|
"vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A
|
||||||
"vqadd.s16 q12, q12, q4 \n" // Accumulate B
|
"vqadd.s16 q12, q12, q4 \n" // Accumulate B
|
||||||
"vqadd.s16 q13, q13, q5 \n" // Accumulate G
|
"vqadd.s16 q13, q13, q5 \n" // Accumulate G
|
||||||
"vqadd.s16 q14, q14, q6 \n" // Accumulate R
|
"vqadd.s16 q14, q14, q6 \n" // Accumulate R
|
||||||
|
"vqadd.s16 q15, q15, q7 \n" // Accumulate A
|
||||||
"vmul.s16 q4, q15, d0[3] \n" // B += A * Matrix B
|
"vmul.s16 q4, q15, d0[3] \n" // B += A * Matrix B
|
||||||
"vmul.s16 q5, q15, d1[3] \n" // G += A * Matrix G
|
"vmul.s16 q5, q15, d1[3] \n" // G += A * Matrix G
|
||||||
"vmul.s16 q6, q15, d2[3] \n" // R += A * Matrix R
|
"vmul.s16 q6, q15, d2[3] \n" // R += A * Matrix R
|
||||||
|
"vmul.s16 q7, q15, d3[3] \n" // A += A * Matrix A
|
||||||
"vqadd.s16 q12, q12, q4 \n" // Accumulate B
|
"vqadd.s16 q12, q12, q4 \n" // Accumulate B
|
||||||
"vqadd.s16 q13, q13, q5 \n" // Accumulate G
|
"vqadd.s16 q13, q13, q5 \n" // Accumulate G
|
||||||
"vqadd.s16 q14, q14, q6 \n" // Accumulate R
|
"vqadd.s16 q14, q14, q6 \n" // Accumulate R
|
||||||
"vqshrun.s16 d16, q12, #7 \n" // 16 bit to 8 bit B
|
"vqadd.s16 q15, q15, q7 \n" // Accumulate A
|
||||||
"vqshrun.s16 d18, q13, #7 \n" // 16 bit to 8 bit G
|
"vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B
|
||||||
"vqshrun.s16 d20, q14, #7 \n" // 16 bit to 8 bit R
|
"vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G
|
||||||
"vst4.8 {d16, d18, d20, d22}, [%0]! \n" // store 8 ARGB pixels.
|
"vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R
|
||||||
|
"vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A
|
||||||
|
"vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(dst_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(width) // %1
|
"+r"(dst_argb), // %1
|
||||||
: "r"(matrix_argb) // %2
|
"+r"(width) // %2
|
||||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q8", "q9",
|
: "r"(matrix_argb) // %3
|
||||||
|
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
|
||||||
"q10", "q11", "q12", "q13", "q14", "q15"
|
"q10", "q11", "q12", "q13", "q14", "q15"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4327,62 +4327,67 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
|
|||||||
#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
|
#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
|
||||||
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
|
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
|
||||||
// Same as Sepia except matrix is provided.
|
// Same as Sepia except matrix is provided.
|
||||||
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
||||||
int width) {
|
const int8* matrix_argb, int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movd "MEMACCESS(2)",%%xmm2 \n"
|
"movd "MEMACCESS(3)",%%xmm2 \n"
|
||||||
"movd "MEMACCESS2(0x4,2)",%%xmm3 \n"
|
"movd "MEMACCESS2(0x4,3)",%%xmm3 \n"
|
||||||
"movd "MEMACCESS2(0x8,2)",%%xmm4 \n"
|
"movd "MEMACCESS2(0x8,3)",%%xmm4 \n"
|
||||||
|
"movd "MEMACCESS2(0xc,3)",%%xmm5 \n"
|
||||||
"pshufd $0x0,%%xmm2,%%xmm2 \n"
|
"pshufd $0x0,%%xmm2,%%xmm2 \n"
|
||||||
"pshufd $0x0,%%xmm3,%%xmm3 \n"
|
"pshufd $0x0,%%xmm3,%%xmm3 \n"
|
||||||
"pshufd $0x0,%%xmm4,%%xmm4 \n"
|
"pshufd $0x0,%%xmm4,%%xmm4 \n"
|
||||||
|
"pshufd $0x0,%%xmm5,%%xmm5 \n"
|
||||||
|
|
||||||
// 8 pixel loop.
|
// 8 pixel loop.
|
||||||
".p2align 4 \n"
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
"movdqa "MEMACCESS(0)",%%xmm0 \n"
|
||||||
"movdqa "MEMACCESS2(0x10,0)",%%xmm6 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm7 \n"
|
||||||
"pmaddubsw %%xmm2,%%xmm0 \n"
|
"pmaddubsw %%xmm2,%%xmm0 \n"
|
||||||
"pmaddubsw %%xmm2,%%xmm6 \n"
|
"pmaddubsw %%xmm2,%%xmm7 \n"
|
||||||
"movdqa "MEMACCESS(0)",%%xmm5 \n"
|
|
||||||
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
|
||||||
"pmaddubsw %%xmm3,%%xmm5 \n"
|
|
||||||
"pmaddubsw %%xmm3,%%xmm1 \n"
|
|
||||||
"phaddsw %%xmm6,%%xmm0 \n"
|
|
||||||
"phaddsw %%xmm1,%%xmm5 \n"
|
|
||||||
"psraw $0x7,%%xmm0 \n"
|
|
||||||
"psraw $0x7,%%xmm5 \n"
|
|
||||||
"packuswb %%xmm0,%%xmm0 \n"
|
|
||||||
"packuswb %%xmm5,%%xmm5 \n"
|
|
||||||
"punpcklbw %%xmm5,%%xmm0 \n"
|
|
||||||
"movdqa "MEMACCESS(0)",%%xmm5 \n"
|
|
||||||
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
|
||||||
"pmaddubsw %%xmm4,%%xmm5 \n"
|
|
||||||
"pmaddubsw %%xmm4,%%xmm1 \n"
|
|
||||||
"phaddsw %%xmm1,%%xmm5 \n"
|
|
||||||
"psraw $0x7,%%xmm5 \n"
|
|
||||||
"packuswb %%xmm5,%%xmm5 \n"
|
|
||||||
"movdqa "MEMACCESS(0)",%%xmm6 \n"
|
"movdqa "MEMACCESS(0)",%%xmm6 \n"
|
||||||
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm1 \n"
|
||||||
"psrld $0x18,%%xmm6 \n"
|
"pmaddubsw %%xmm3,%%xmm6 \n"
|
||||||
"psrld $0x18,%%xmm1 \n"
|
"pmaddubsw %%xmm3,%%xmm1 \n"
|
||||||
"packuswb %%xmm1,%%xmm6 \n"
|
"phaddsw %%xmm7,%%xmm0 \n"
|
||||||
|
"phaddsw %%xmm1,%%xmm6 \n"
|
||||||
|
"psraw $0x6,%%xmm0 \n"
|
||||||
|
"psraw $0x6,%%xmm6 \n"
|
||||||
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
"packuswb %%xmm6,%%xmm6 \n"
|
"packuswb %%xmm6,%%xmm6 \n"
|
||||||
"movdqa %%xmm0,%%xmm1 \n"
|
"punpcklbw %%xmm6,%%xmm0 \n"
|
||||||
"punpcklbw %%xmm6,%%xmm5 \n"
|
"movdqa "MEMACCESS(0)",%%xmm1 \n"
|
||||||
"punpcklwd %%xmm5,%%xmm0 \n"
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm7 \n"
|
||||||
"punpckhwd %%xmm5,%%xmm1 \n"
|
"pmaddubsw %%xmm4,%%xmm1 \n"
|
||||||
"sub $0x8,%1 \n"
|
"pmaddubsw %%xmm4,%%xmm7 \n"
|
||||||
"movdqa %%xmm0,"MEMACCESS(0)" \n"
|
"phaddsw %%xmm7,%%xmm1 \n"
|
||||||
"movdqa %%xmm1,"MEMACCESS2(0x10,0)" \n"
|
"movdqa "MEMACCESS(0)",%%xmm6 \n"
|
||||||
|
"movdqa "MEMACCESS2(0x10,0)",%%xmm7 \n"
|
||||||
|
"pmaddubsw %%xmm5,%%xmm6 \n"
|
||||||
|
"pmaddubsw %%xmm5,%%xmm7 \n"
|
||||||
|
"phaddsw %%xmm7,%%xmm6 \n"
|
||||||
|
"psraw $0x6,%%xmm1 \n"
|
||||||
|
"psraw $0x6,%%xmm6 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm6,%%xmm6 \n"
|
||||||
|
"punpcklbw %%xmm6,%%xmm1 \n"
|
||||||
|
"movdqa %%xmm0,%%xmm6 \n"
|
||||||
|
"punpcklwd %%xmm1,%%xmm0 \n"
|
||||||
|
"punpckhwd %%xmm1,%%xmm6 \n"
|
||||||
|
"sub $0x8,%2 \n"
|
||||||
|
"movdqa %%xmm0,"MEMACCESS(1)" \n"
|
||||||
|
"movdqa %%xmm6,"MEMACCESS2(0x10,1)" \n"
|
||||||
"lea "MEMLEA(0x20,0)",%0 \n"
|
"lea "MEMLEA(0x20,0)",%0 \n"
|
||||||
|
"lea "MEMLEA(0x20,1)",%1 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(dst_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(width) // %1
|
"+r"(dst_argb), // %1
|
||||||
: "r"(matrix_argb) // %2
|
"+r"(width) // %2
|
||||||
|
: "r"(matrix_argb) // %3
|
||||||
: "memory", "cc"
|
: "memory", "cc"
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5002,57 +5002,62 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
|
|||||||
// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
|
// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R
|
||||||
// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
|
// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd.
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
|
void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
||||||
int width) {
|
const int8* matrix_argb, int width) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] /* dst_argb */
|
mov eax, [esp + 4] /* src_argb */
|
||||||
mov edx, [esp + 8] /* matrix_argb */
|
mov edx, [esp + 8] /* dst_argb */
|
||||||
mov ecx, [esp + 12] /* width */
|
mov ecx, [esp + 12] /* matrix_argb */
|
||||||
movd xmm2, [edx]
|
movd xmm2, [ecx]
|
||||||
movd xmm3, [edx + 4]
|
movd xmm3, [ecx + 4]
|
||||||
movd xmm4, [edx + 8]
|
movd xmm4, [ecx + 8]
|
||||||
|
movd xmm5, [ecx + 12]
|
||||||
pshufd xmm2, xmm2, 0
|
pshufd xmm2, xmm2, 0
|
||||||
pshufd xmm3, xmm3, 0
|
pshufd xmm3, xmm3, 0
|
||||||
pshufd xmm4, xmm4, 0
|
pshufd xmm4, xmm4, 0
|
||||||
|
pshufd xmm5, xmm5, 0
|
||||||
|
mov ecx, [esp + 16] /* width */
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
convertloop:
|
convertloop:
|
||||||
movdqa xmm0, [eax] // B
|
movdqa xmm0, [eax] // B
|
||||||
movdqa xmm6, [eax + 16]
|
movdqa xmm7, [eax + 16]
|
||||||
pmaddubsw xmm0, xmm2
|
pmaddubsw xmm0, xmm2
|
||||||
pmaddubsw xmm6, xmm2
|
pmaddubsw xmm7, xmm2
|
||||||
movdqa xmm5, [eax] // G
|
movdqa xmm6, [eax] // G
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm1, [eax + 16]
|
||||||
pmaddubsw xmm5, xmm3
|
pmaddubsw xmm6, xmm3
|
||||||
pmaddubsw xmm1, xmm3
|
pmaddubsw xmm1, xmm3
|
||||||
phaddsw xmm0, xmm6 // B
|
phaddsw xmm0, xmm7 // B
|
||||||
phaddsw xmm5, xmm1 // G
|
phaddsw xmm6, xmm1 // G
|
||||||
psraw xmm0, 7 // B
|
psraw xmm0, 6 // B
|
||||||
psraw xmm5, 7 // G
|
psraw xmm6, 6 // G
|
||||||
packuswb xmm0, xmm0 // 8 B values
|
packuswb xmm0, xmm0 // 8 B values
|
||||||
packuswb xmm5, xmm5 // 8 G values
|
packuswb xmm6, xmm6 // 8 G values
|
||||||
punpcklbw xmm0, xmm5 // 8 BG values
|
punpcklbw xmm0, xmm6 // 8 BG values
|
||||||
movdqa xmm5, [eax] // R
|
movdqa xmm1, [eax] // R
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm7, [eax + 16]
|
||||||
pmaddubsw xmm5, xmm4
|
|
||||||
pmaddubsw xmm1, xmm4
|
pmaddubsw xmm1, xmm4
|
||||||
phaddsw xmm5, xmm1
|
pmaddubsw xmm7, xmm4
|
||||||
psraw xmm5, 7
|
phaddsw xmm1, xmm7 // R
|
||||||
packuswb xmm5, xmm5 // 8 R values
|
|
||||||
movdqa xmm6, [eax] // A
|
movdqa xmm6, [eax] // A
|
||||||
movdqa xmm1, [eax + 16]
|
movdqa xmm7, [eax + 16]
|
||||||
psrld xmm6, 24
|
pmaddubsw xmm6, xmm5
|
||||||
psrld xmm1, 24
|
pmaddubsw xmm7, xmm5
|
||||||
packuswb xmm6, xmm1
|
phaddsw xmm6, xmm7 // A
|
||||||
|
psraw xmm1, 6 // R
|
||||||
|
psraw xmm6, 6 // A
|
||||||
|
packuswb xmm1, xmm1 // 8 R values
|
||||||
packuswb xmm6, xmm6 // 8 A values
|
packuswb xmm6, xmm6 // 8 A values
|
||||||
movdqa xmm1, xmm0 // Weave BG, RA together
|
punpcklbw xmm1, xmm6 // 8 RA values
|
||||||
punpcklbw xmm5, xmm6 // 8 RA values
|
movdqa xmm6, xmm0 // Weave BG, RA together
|
||||||
punpcklwd xmm0, xmm5 // BGRA first 4
|
punpcklwd xmm0, xmm1 // BGRA first 4
|
||||||
punpckhwd xmm1, xmm5 // BGRA next 4
|
punpckhwd xmm6, xmm1 // BGRA next 4
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
movdqa [eax], xmm0
|
movdqa [edx], xmm0
|
||||||
movdqa [eax + 16], xmm1
|
movdqa [edx + 16], xmm6
|
||||||
lea eax, [eax + 32]
|
lea eax, [eax + 32]
|
||||||
|
lea edx, [edx + 32]
|
||||||
jg convertloop
|
jg convertloop
|
||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
|
|||||||
@ -484,9 +484,87 @@ TEST_F(libyuvTest, TestARGBSepia) {
|
|||||||
|
|
||||||
TEST_F(libyuvTest, TestARGBColorMatrix) {
|
TEST_F(libyuvTest, TestARGBColorMatrix) {
|
||||||
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
|
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
|
||||||
|
SIMD_ALIGNED(uint8 dst_pixels_opt[1280][4]);
|
||||||
|
SIMD_ALIGNED(uint8 dst_pixels_c[1280][4]);
|
||||||
|
|
||||||
// Matrix for Sepia.
|
// Matrix for Sepia.
|
||||||
static const int8 kARGBToSepia[] = {
|
SIMD_ALIGNED(static const int8 kRGBToSepia[]) = {
|
||||||
|
17 / 2, 68 / 2, 35 / 2, 0,
|
||||||
|
22 / 2, 88 / 2, 45 / 2, 0,
|
||||||
|
24 / 2, 98 / 2, 50 / 2, 0,
|
||||||
|
0, 0, 0, 64, // Copy alpha.
|
||||||
|
};
|
||||||
|
memset(orig_pixels, 0, sizeof(orig_pixels));
|
||||||
|
|
||||||
|
// Test blue
|
||||||
|
orig_pixels[0][0] = 255u;
|
||||||
|
orig_pixels[0][1] = 0u;
|
||||||
|
orig_pixels[0][2] = 0u;
|
||||||
|
orig_pixels[0][3] = 128u;
|
||||||
|
// Test green
|
||||||
|
orig_pixels[1][0] = 0u;
|
||||||
|
orig_pixels[1][1] = 255u;
|
||||||
|
orig_pixels[1][2] = 0u;
|
||||||
|
orig_pixels[1][3] = 0u;
|
||||||
|
// Test red
|
||||||
|
orig_pixels[2][0] = 0u;
|
||||||
|
orig_pixels[2][1] = 0u;
|
||||||
|
orig_pixels[2][2] = 255u;
|
||||||
|
orig_pixels[2][3] = 255u;
|
||||||
|
// Test color
|
||||||
|
orig_pixels[3][0] = 16u;
|
||||||
|
orig_pixels[3][1] = 64u;
|
||||||
|
orig_pixels[3][2] = 192u;
|
||||||
|
orig_pixels[3][3] = 224u;
|
||||||
|
// Do 16 to test asm version.
|
||||||
|
ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
|
||||||
|
&kRGBToSepia[0], 16, 1);
|
||||||
|
EXPECT_EQ(31u, dst_pixels_opt[0][0]);
|
||||||
|
EXPECT_EQ(43u, dst_pixels_opt[0][1]);
|
||||||
|
EXPECT_EQ(47u, dst_pixels_opt[0][2]);
|
||||||
|
EXPECT_EQ(128u, dst_pixels_opt[0][3]);
|
||||||
|
EXPECT_EQ(135u, dst_pixels_opt[1][0]);
|
||||||
|
EXPECT_EQ(175u, dst_pixels_opt[1][1]);
|
||||||
|
EXPECT_EQ(195u, dst_pixels_opt[1][2]);
|
||||||
|
EXPECT_EQ(0u, dst_pixels_opt[1][3]);
|
||||||
|
EXPECT_EQ(67u, dst_pixels_opt[2][0]);
|
||||||
|
EXPECT_EQ(87u, dst_pixels_opt[2][1]);
|
||||||
|
EXPECT_EQ(99u, dst_pixels_opt[2][2]);
|
||||||
|
EXPECT_EQ(255u, dst_pixels_opt[2][3]);
|
||||||
|
EXPECT_EQ(87u, dst_pixels_opt[3][0]);
|
||||||
|
EXPECT_EQ(112u, dst_pixels_opt[3][1]);
|
||||||
|
EXPECT_EQ(127u, dst_pixels_opt[3][2]);
|
||||||
|
EXPECT_EQ(224u, dst_pixels_opt[3][3]);
|
||||||
|
|
||||||
|
for (int i = 0; i < 1280; ++i) {
|
||||||
|
orig_pixels[i][0] = i;
|
||||||
|
orig_pixels[i][1] = i / 2;
|
||||||
|
orig_pixels[i][2] = i / 3;
|
||||||
|
orig_pixels[i][3] = i;
|
||||||
|
}
|
||||||
|
MaskCpuFlags(0);
|
||||||
|
ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
|
||||||
|
&kRGBToSepia[0], 1280, 1);
|
||||||
|
MaskCpuFlags(-1);
|
||||||
|
|
||||||
|
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
||||||
|
ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
|
||||||
|
&kRGBToSepia[0], 1280, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 1280; ++i) {
|
||||||
|
EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
|
||||||
|
EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
|
||||||
|
EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
|
||||||
|
EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(libyuvTest, TestRGBColorMatrix) {
|
||||||
|
SIMD_ALIGNED(uint8 orig_pixels[1280][4]);
|
||||||
|
|
||||||
|
// Matrix for Sepia.
|
||||||
|
SIMD_ALIGNED(static const int8 kRGBToSepia[]) = {
|
||||||
17, 68, 35, 0,
|
17, 68, 35, 0,
|
||||||
22, 88, 45, 0,
|
22, 88, 45, 0,
|
||||||
24, 98, 50, 0,
|
24, 98, 50, 0,
|
||||||
@ -515,8 +593,8 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
|
|||||||
orig_pixels[3][2] = 192u;
|
orig_pixels[3][2] = 192u;
|
||||||
orig_pixels[3][3] = 224u;
|
orig_pixels[3][3] = 224u;
|
||||||
// Do 16 to test asm version.
|
// Do 16 to test asm version.
|
||||||
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 16, 1);
|
RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
|
||||||
EXPECT_EQ(33u, orig_pixels[0][0]);
|
EXPECT_EQ(31u, orig_pixels[0][0]);
|
||||||
EXPECT_EQ(43u, orig_pixels[0][1]);
|
EXPECT_EQ(43u, orig_pixels[0][1]);
|
||||||
EXPECT_EQ(47u, orig_pixels[0][2]);
|
EXPECT_EQ(47u, orig_pixels[0][2]);
|
||||||
EXPECT_EQ(128u, orig_pixels[0][3]);
|
EXPECT_EQ(128u, orig_pixels[0][3]);
|
||||||
@ -524,12 +602,12 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
|
|||||||
EXPECT_EQ(175u, orig_pixels[1][1]);
|
EXPECT_EQ(175u, orig_pixels[1][1]);
|
||||||
EXPECT_EQ(195u, orig_pixels[1][2]);
|
EXPECT_EQ(195u, orig_pixels[1][2]);
|
||||||
EXPECT_EQ(0u, orig_pixels[1][3]);
|
EXPECT_EQ(0u, orig_pixels[1][3]);
|
||||||
EXPECT_EQ(69u, orig_pixels[2][0]);
|
EXPECT_EQ(67u, orig_pixels[2][0]);
|
||||||
EXPECT_EQ(89u, orig_pixels[2][1]);
|
EXPECT_EQ(87u, orig_pixels[2][1]);
|
||||||
EXPECT_EQ(99u, orig_pixels[2][2]);
|
EXPECT_EQ(99u, orig_pixels[2][2]);
|
||||||
EXPECT_EQ(255u, orig_pixels[2][3]);
|
EXPECT_EQ(255u, orig_pixels[2][3]);
|
||||||
EXPECT_EQ(88u, orig_pixels[3][0]);
|
EXPECT_EQ(87u, orig_pixels[3][0]);
|
||||||
EXPECT_EQ(114u, orig_pixels[3][1]);
|
EXPECT_EQ(112u, orig_pixels[3][1]);
|
||||||
EXPECT_EQ(127u, orig_pixels[3][2]);
|
EXPECT_EQ(127u, orig_pixels[3][2]);
|
||||||
EXPECT_EQ(224u, orig_pixels[3][3]);
|
EXPECT_EQ(224u, orig_pixels[3][3]);
|
||||||
|
|
||||||
@ -540,7 +618,7 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
|
|||||||
orig_pixels[i][3] = i;
|
orig_pixels[i][3] = i;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
|
||||||
ARGBColorMatrix(&orig_pixels[0][0], 0, &kARGBToSepia[0], 0, 0, 1280, 1);
|
RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user