mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBUnattenuateRow_SSE2 use reciprocal table and pmul
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/497001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@244 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
f2c86d01cc
commit
810cd91079
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
|
|
||||||
|
#include <stdio.h> // printf()
|
||||||
#include <string.h> // for memset()
|
#include <string.h> // for memset()
|
||||||
|
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
@ -909,80 +910,6 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Divide source RGB by alpha and store to destination.
|
|
||||||
// b = (b * 255 + (a / 2)) / a;
|
|
||||||
// g = (g * 255 + (a / 2)) / a;
|
|
||||||
// r = (r * 255 + (a / 2)) / a;
|
|
||||||
// Reciprocal method is off by 1 on some values. ie 125
|
|
||||||
// 8.16 fixed point inverse table
|
|
||||||
#define T(a) 0x1000000 / a
|
|
||||||
static uint32 fixed_invtbl[256] = {
|
|
||||||
0, T(0x01), T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
|
|
||||||
T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
|
|
||||||
T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
|
|
||||||
T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
|
|
||||||
T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
|
|
||||||
T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
|
|
||||||
T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
|
|
||||||
T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
|
|
||||||
T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
|
|
||||||
T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
|
|
||||||
T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
|
|
||||||
T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
|
|
||||||
T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
|
|
||||||
T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
|
|
||||||
T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
|
|
||||||
T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
|
|
||||||
T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
|
|
||||||
T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
|
|
||||||
T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
|
|
||||||
T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
|
|
||||||
T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
|
|
||||||
T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
|
|
||||||
T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
|
|
||||||
T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
|
|
||||||
T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
|
|
||||||
T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
|
|
||||||
T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
|
|
||||||
T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
|
|
||||||
T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
|
|
||||||
T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
|
|
||||||
T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
|
|
||||||
T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), T(0xff) };
|
|
||||||
#undef T
|
|
||||||
|
|
||||||
static void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb,
|
|
||||||
int width) {
|
|
||||||
for (int i = 0; i < width; ++i) {
|
|
||||||
uint32 b = src_argb[0];
|
|
||||||
uint32 g = src_argb[1];
|
|
||||||
uint32 r = src_argb[2];
|
|
||||||
const uint32 a = src_argb[3];
|
|
||||||
if (a) {
|
|
||||||
const uint32 ia = fixed_invtbl[a]; // 8.16 fixed point
|
|
||||||
b = (b * ia + 0x8000) >> 16;
|
|
||||||
g = (g * ia + 0x8000) >> 16;
|
|
||||||
r = (r * ia + 0x8000) >> 16;
|
|
||||||
// Clamping should not be necessary but is free in assembly.
|
|
||||||
if (b > 255) {
|
|
||||||
b = 255;
|
|
||||||
}
|
|
||||||
if (g > 255) {
|
|
||||||
g = 255;
|
|
||||||
}
|
|
||||||
if (r > 255) {
|
|
||||||
r = 255;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dst_argb[0] = b;
|
|
||||||
dst_argb[1] = g;
|
|
||||||
dst_argb[2] = r;
|
|
||||||
dst_argb[3] = a;
|
|
||||||
src_argb += 4;
|
|
||||||
dst_argb += 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert unattentuated ARGB values to preattenuated ARGB.
|
// Convert unattentuated ARGB values to preattenuated ARGB.
|
||||||
int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
||||||
uint8* dst_argb, int dst_stride_argb,
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
@ -1010,7 +937,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
14
source/row.h
14
source/row.h
@ -67,11 +67,8 @@ extern "C" {
|
|||||||
#define HAS_ARGBBLENDROW_SSE2
|
#define HAS_ARGBBLENDROW_SSE2
|
||||||
#define HAS_ARGBBLENDROW_SSSE3
|
#define HAS_ARGBBLENDROW_SSSE3
|
||||||
#define HAS_ARGBATTENUATE_SSE2
|
#define HAS_ARGBATTENUATE_SSE2
|
||||||
#endif
|
|
||||||
|
|
||||||
// The following are available on Windows 32 bit
|
|
||||||
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
|
|
||||||
#define HAS_ARGBATTENUATE_SSSE3
|
#define HAS_ARGBATTENUATE_SSSE3
|
||||||
|
#define HAS_ARGBUNATTENUATE_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on Neon platforms
|
// The following are available on Neon platforms
|
||||||
@ -312,11 +309,11 @@ void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
|||||||
void BGRAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
void BGRAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||||
void ABGRToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
void ABGRToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
|
||||||
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
||||||
uint8* dst_u, uint8* dst_v, int width);
|
uint8* dst_u, uint8* dst_v, int width);
|
||||||
|
|
||||||
void I420ToARGBRow_Any_NEON(const uint8* y_buf,
|
void I420ToARGBRow_Any_NEON(const uint8* y_buf,
|
||||||
const uint8* u_buf,
|
const uint8* u_buf,
|
||||||
@ -370,6 +367,9 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
|||||||
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
|
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
|
||||||
|
void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -700,6 +700,79 @@ void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Divide source RGB by alpha and store to destination.
|
||||||
|
// b = (b * 255 + (a / 2)) / a;
|
||||||
|
// g = (g * 255 + (a / 2)) / a;
|
||||||
|
// r = (r * 255 + (a / 2)) / a;
|
||||||
|
// Reciprocal method is off by 1 on some values. ie 125
|
||||||
|
// 8.16 fixed point inverse table
|
||||||
|
#define T(a) 0x1000000 / a
|
||||||
|
static uint32 fixed_invtbl[256] = {
|
||||||
|
0, T(0x01), T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
|
||||||
|
T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
|
||||||
|
T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
|
||||||
|
T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
|
||||||
|
T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
|
||||||
|
T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
|
||||||
|
T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
|
||||||
|
T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
|
||||||
|
T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
|
||||||
|
T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
|
||||||
|
T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
|
||||||
|
T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
|
||||||
|
T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
|
||||||
|
T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
|
||||||
|
T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
|
||||||
|
T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
|
||||||
|
T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
|
||||||
|
T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
|
||||||
|
T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
|
||||||
|
T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
|
||||||
|
T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
|
||||||
|
T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
|
||||||
|
T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
|
||||||
|
T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
|
||||||
|
T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
|
||||||
|
T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
|
||||||
|
T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
|
||||||
|
T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
|
||||||
|
T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
|
||||||
|
T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
|
||||||
|
T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
|
||||||
|
T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), T(0xff) };
|
||||||
|
#undef T
|
||||||
|
|
||||||
|
void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
|
for (int i = 0; i < width; ++i) {
|
||||||
|
uint32 b = src_argb[0];
|
||||||
|
uint32 g = src_argb[1];
|
||||||
|
uint32 r = src_argb[2];
|
||||||
|
const uint32 a = src_argb[3];
|
||||||
|
if (a) {
|
||||||
|
const uint32 ia = fixed_invtbl[a]; // 8.16 fixed point
|
||||||
|
b = (b * ia + 0x8000) >> 16;
|
||||||
|
g = (g * ia + 0x8000) >> 16;
|
||||||
|
r = (r * ia + 0x8000) >> 16;
|
||||||
|
// Clamping should not be necessary but is free in assembly.
|
||||||
|
if (b > 255) {
|
||||||
|
b = 255;
|
||||||
|
}
|
||||||
|
if (g > 255) {
|
||||||
|
g = 255;
|
||||||
|
}
|
||||||
|
if (r > 255) {
|
||||||
|
r = 255;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst_argb[0] = b;
|
||||||
|
dst_argb[1] = g;
|
||||||
|
dst_argb[2] = r;
|
||||||
|
dst_argb[3] = a;
|
||||||
|
src_argb += 4;
|
||||||
|
dst_argb += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -1730,6 +1730,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
|
|||||||
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
|
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"sub %0,%1 \n"
|
"sub %0,%1 \n"
|
||||||
|
".p2align 4 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
"movdqa 0x10(%0),%%xmm1 \n"
|
"movdqa 0x10(%0),%%xmm1 \n"
|
||||||
@ -2192,9 +2193,9 @@ void ARGBBlendRow_Aligned_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
"movdqu 0x10(%0),%%xmm3 \n"
|
"movdqu 0x10(%0),%%xmm3 \n"
|
||||||
"lea 0x20(%0),%0 \n"
|
"lea 0x20(%0),%0 \n"
|
||||||
"psrlw $0x8,%%xmm2 \n"
|
"psrlw $0x8,%%xmm2 \n"
|
||||||
"paddusb %%xmm2,%%xmm0 \n"
|
"paddusb %%xmm2,%%xmm0 \n"
|
||||||
"pand %%xmm5,%%xmm1 \n"
|
"pand %%xmm5,%%xmm1 \n"
|
||||||
"paddusb %%xmm1,%%xmm0 \n"
|
"paddusb %%xmm1,%%xmm0 \n"
|
||||||
"sub $0x4,%3 \n"
|
"sub $0x4,%3 \n"
|
||||||
"movdqa %%xmm0,(%2) \n"
|
"movdqa %%xmm0,(%2) \n"
|
||||||
"jle 9f \n"
|
"jle 9f \n"
|
||||||
@ -2242,6 +2243,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
"pslld $0x18,%%xmm4 \n"
|
"pslld $0x18,%%xmm4 \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrld $0x8,%%xmm5 \n"
|
"psrld $0x8,%%xmm5 \n"
|
||||||
|
|
||||||
// 4 pixel loop
|
// 4 pixel loop
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqa (%0),%%xmm0 \n"
|
"movdqa (%0),%%xmm0 \n"
|
||||||
@ -2254,13 +2256,13 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
"pshufhw $0xff,%%xmm1,%%xmm2 \n"
|
"pshufhw $0xff,%%xmm1,%%xmm2 \n"
|
||||||
"pshuflw $0xff,%%xmm2,%%xmm2 \n"
|
"pshuflw $0xff,%%xmm2,%%xmm2 \n"
|
||||||
"pmulhuw %%xmm2,%%xmm1 \n"
|
"pmulhuw %%xmm2,%%xmm1 \n"
|
||||||
"movdqa (%0),%%xmm3 \n"
|
"movdqa (%0),%%xmm2 \n"
|
||||||
"psrlw $0x8,%%xmm0 \n"
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
"pand %%xmm4,%%xmm3 \n"
|
"pand %%xmm4,%%xmm2 \n"
|
||||||
"psrlw $0x8,%%xmm1 \n"
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
"packuswb %%xmm1,%%xmm0 \n"
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
"pand %%xmm5,%%xmm0 \n"
|
"pand %%xmm5,%%xmm0 \n"
|
||||||
"por %%xmm3,%%xmm0 \n"
|
"por %%xmm2,%%xmm0 \n"
|
||||||
"sub $0x4,%2 \n"
|
"sub $0x4,%2 \n"
|
||||||
"movdqa %%xmm0,(%0,%1,1) \n"
|
"movdqa %%xmm0,(%0,%1,1) \n"
|
||||||
"lea 0x10(%0),%0 \n"
|
"lea 0x10(%0),%0 \n"
|
||||||
@ -2277,6 +2279,156 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBATTENUATE_SSE2
|
#endif // HAS_ARGBATTENUATE_SSE2
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBATTENUATE_SSSE3
|
||||||
|
// Shuffle table duplicating alpha
|
||||||
|
CONST uvec8 kShuffleAlpha0 = {
|
||||||
|
3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
|
||||||
|
};
|
||||||
|
CONST uvec8 kShuffleAlpha1 = {
|
||||||
|
11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
|
||||||
|
15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u,
|
||||||
|
};
|
||||||
|
// Attenuate 4 pixels at a time.
|
||||||
|
// aligned to 16 bytes
|
||||||
|
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
|
asm volatile (
|
||||||
|
"sub %0,%1 \n"
|
||||||
|
"pcmpeqb %%xmm3,%%xmm3 \n"
|
||||||
|
"pslld $0x18,%%xmm3 \n"
|
||||||
|
"movdqa %3,%%xmm4 \n"
|
||||||
|
"movdqa %4,%%xmm5 \n"
|
||||||
|
|
||||||
|
// 4 pixel loop
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm0 \n"
|
||||||
|
"pshufb %%xmm4,%%xmm0 \n"
|
||||||
|
"movdqa (%0),%%xmm1 \n"
|
||||||
|
"punpcklbw %%xmm1,%%xmm1 \n"
|
||||||
|
"pmulhuw %%xmm1,%%xmm0 \n"
|
||||||
|
"movdqa (%0),%%xmm1 \n"
|
||||||
|
"pshufb %%xmm5,%%xmm1 \n"
|
||||||
|
"movdqa (%0),%%xmm2 \n"
|
||||||
|
"punpckhbw %%xmm2,%%xmm2 \n"
|
||||||
|
"pmulhuw %%xmm2,%%xmm1 \n"
|
||||||
|
"movdqa (%0),%%xmm2 \n"
|
||||||
|
"pand %%xmm3,%%xmm2 \n"
|
||||||
|
"psrlw $0x8,%%xmm0 \n"
|
||||||
|
"psrlw $0x8,%%xmm1 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
|
"por %%xmm2,%%xmm0 \n"
|
||||||
|
"sub $0x4,%2 \n"
|
||||||
|
"movdqa %%xmm0,(%0,%1,1) \n"
|
||||||
|
"lea 0x10(%0),%0 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_argb), // %0
|
||||||
|
"+r"(dst_argb), // %1
|
||||||
|
"+r"(width) // %2
|
||||||
|
: "m"(kShuffleAlpha0), // %3
|
||||||
|
"m"(kShuffleAlpha1) // %4
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBATTENUATE_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBUNATTENUATE_SSE2
|
||||||
|
// Divide source RGB by alpha and store to destination.
|
||||||
|
// b = (b * 255 + (a / 2)) / a;
|
||||||
|
// g = (g * 255 + (a / 2)) / a;
|
||||||
|
// r = (r * 255 + (a / 2)) / a;
|
||||||
|
// Reciprocal method is off by 1 on some values. ie 125
|
||||||
|
// 8.16 fixed point inverse table
|
||||||
|
#define T(a) 0x10000 / a
|
||||||
|
CONST uint32 fixed_invtbl8[256] = {
|
||||||
|
0x100, 0xffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
|
||||||
|
T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
|
||||||
|
T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
|
||||||
|
T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
|
||||||
|
T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
|
||||||
|
T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
|
||||||
|
T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
|
||||||
|
T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
|
||||||
|
T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
|
||||||
|
T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
|
||||||
|
T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
|
||||||
|
T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
|
||||||
|
T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
|
||||||
|
T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
|
||||||
|
T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
|
||||||
|
T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
|
||||||
|
T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
|
||||||
|
T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
|
||||||
|
T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
|
||||||
|
T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
|
||||||
|
T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
|
||||||
|
T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
|
||||||
|
T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
|
||||||
|
T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
|
||||||
|
T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
|
||||||
|
T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
|
||||||
|
T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
|
||||||
|
T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
|
||||||
|
T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
|
||||||
|
T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
|
||||||
|
T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
|
||||||
|
T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x100 };
|
||||||
|
#undef T
|
||||||
|
|
||||||
|
// Unattenuate 4 pixels at a time.
|
||||||
|
// aligned to 16 bytes
|
||||||
|
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
int width) {
|
||||||
|
uintptr_t alpha = 0;
|
||||||
|
asm volatile (
|
||||||
|
"sub %0,%1 \n"
|
||||||
|
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||||
|
"pslld $0x18,%%xmm4 \n"
|
||||||
|
|
||||||
|
// 4 pixel loop
|
||||||
|
"1: \n"
|
||||||
|
"movdqa (%0),%%xmm0 \n"
|
||||||
|
"movzb 0x3(%0),%3 \n"
|
||||||
|
"punpcklbw %%xmm0,%%xmm0 \n"
|
||||||
|
"movd 0x0(%4,%3,4),%%xmm2 \n"
|
||||||
|
"movzb 0x7(%0),%3 \n"
|
||||||
|
"movd 0x0(%4,%3,4),%%xmm3 \n"
|
||||||
|
"pshuflw $0xc0,%%xmm2,%%xmm2 \n"
|
||||||
|
"pshuflw $0xc0,%%xmm3,%%xmm3 \n"
|
||||||
|
"movlhps %%xmm3,%%xmm2 \n"
|
||||||
|
"pmulhuw %%xmm2,%%xmm0 \n"
|
||||||
|
"movdqa (%0),%%xmm1 \n"
|
||||||
|
"movzb 0xb(%0),%3 \n"
|
||||||
|
"punpckhbw %%xmm1,%%xmm1 \n"
|
||||||
|
"movd 0x0(%4,%3,4),%%xmm2 \n"
|
||||||
|
"movzb 0xf(%0),%3 \n"
|
||||||
|
"movd 0x0(%4,%3,4),%%xmm3 \n"
|
||||||
|
"pshuflw $0xc0,%%xmm2,%%xmm2 \n"
|
||||||
|
"pshuflw $0xc0,%%xmm3,%%xmm3 \n"
|
||||||
|
"movlhps %%xmm3,%%xmm2 \n"
|
||||||
|
"pmulhuw %%xmm2,%%xmm1 \n"
|
||||||
|
"movdqa (%0),%%xmm2 \n"
|
||||||
|
"pand %%xmm4,%%xmm2 \n"
|
||||||
|
"packuswb %%xmm1,%%xmm0 \n"
|
||||||
|
"por %%xmm2,%%xmm0 \n"
|
||||||
|
"sub $0x4,%2 \n"
|
||||||
|
"movdqa %%xmm0,(%0,%1,1) \n"
|
||||||
|
"lea 0x10(%0),%0 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
: "+r"(src_argb), // %0
|
||||||
|
"+r"(dst_argb), // %1
|
||||||
|
"+r"(width), // %2
|
||||||
|
"+r"(alpha) // %3
|
||||||
|
: "r"(fixed_invtbl8) // %4
|
||||||
|
: "memory", "cc"
|
||||||
|
#if defined(__SSE2__)
|
||||||
|
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBUNATTENUATE_SSE2
|
||||||
|
|
||||||
#endif // defined(__x86_64__) || defined(__i386__)
|
#endif // defined(__x86_64__) || defined(__i386__)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@ -2319,13 +2319,13 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
pshufhw xmm2, xmm1,0FFh // 8 alpha words
|
pshufhw xmm2, xmm1,0FFh // 8 alpha words
|
||||||
pshuflw xmm2, xmm2,0FFh
|
pshuflw xmm2, xmm2,0FFh
|
||||||
pmulhuw xmm1, xmm2 // rgb * a
|
pmulhuw xmm1, xmm2 // rgb * a
|
||||||
movdqa xmm3, [eax] // alphas
|
movdqa xmm2, [eax] // alphas
|
||||||
psrlw xmm0, 8
|
psrlw xmm0, 8
|
||||||
pand xmm3, xmm4
|
pand xmm2, xmm4
|
||||||
psrlw xmm1, 8
|
psrlw xmm1, 8
|
||||||
packuswb xmm0, xmm1
|
packuswb xmm0, xmm1
|
||||||
pand xmm0, xmm5 // keep original alphas
|
pand xmm0, xmm5 // keep original alphas
|
||||||
por xmm0, xmm3
|
por xmm0, xmm2
|
||||||
sub ecx, 4
|
sub ecx, 4
|
||||||
movdqa [eax + edx], xmm0
|
movdqa [eax + edx], xmm0
|
||||||
lea eax, [eax + 16]
|
lea eax, [eax + 16]
|
||||||
@ -2347,7 +2347,6 @@ static const uvec8 kShuffleAlpha1 = {
|
|||||||
};
|
};
|
||||||
__declspec(naked) __declspec(align(16))
|
__declspec(naked) __declspec(align(16))
|
||||||
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||||
__asm {
|
|
||||||
mov eax, [esp + 4] // src_argb0
|
mov eax, [esp + 4] // src_argb0
|
||||||
mov edx, [esp + 8] // dst_argb
|
mov edx, [esp + 8] // dst_argb
|
||||||
mov ecx, [esp + 12] // width
|
mov ecx, [esp + 12] // width
|
||||||
@ -2360,7 +2359,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
align 16
|
align 16
|
||||||
convertloop:
|
convertloop:
|
||||||
movdqa xmm0, [eax] // read 4 pixels
|
movdqa xmm0, [eax] // read 4 pixels
|
||||||
pshufb xmm0, xmm4 // isolate first 2 alphas
|
pshufb xmm0, xmm4 // isolate first 2 alphas
|
||||||
movdqa xmm1, [eax] // read 4 pixels
|
movdqa xmm1, [eax] // read 4 pixels
|
||||||
punpcklbw xmm1, xmm1 // first 2 pixel rgbs
|
punpcklbw xmm1, xmm1 // first 2 pixel rgbs
|
||||||
pmulhuw xmm0, xmm1 // rgb * a
|
pmulhuw xmm0, xmm1 // rgb * a
|
||||||
@ -2383,9 +2382,105 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // HAS_ARGBATTENUATE_SSSE3
|
#endif // HAS_ARGBATTENUATE_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBUNATTENUATE_SSE2
|
||||||
|
// Divide source RGB by alpha and store to destination.
|
||||||
|
// b = (b * 255 + (a / 2)) / a;
|
||||||
|
// g = (g * 255 + (a / 2)) / a;
|
||||||
|
// r = (r * 255 + (a / 2)) / a;
|
||||||
|
// Reciprocal method is off by 1 on some values. ie 125
|
||||||
|
// 8.16 fixed point inverse table
|
||||||
|
#define T(a) 0x10000 / a
|
||||||
|
static uint32 fixed_invtbl8[256] = {
|
||||||
|
0x100, 0xffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
|
||||||
|
T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
|
||||||
|
T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
|
||||||
|
T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
|
||||||
|
T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
|
||||||
|
T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
|
||||||
|
T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
|
||||||
|
T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
|
||||||
|
T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
|
||||||
|
T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
|
||||||
|
T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
|
||||||
|
T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
|
||||||
|
T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
|
||||||
|
T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
|
||||||
|
T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
|
||||||
|
T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
|
||||||
|
T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
|
||||||
|
T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
|
||||||
|
T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
|
||||||
|
T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
|
||||||
|
T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
|
||||||
|
T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
|
||||||
|
T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
|
||||||
|
T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
|
||||||
|
T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
|
||||||
|
T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
|
||||||
|
T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
|
||||||
|
T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
|
||||||
|
T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
|
||||||
|
T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
|
||||||
|
T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
|
||||||
|
T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x100 };
|
||||||
|
#undef T
|
||||||
|
|
||||||
|
// Unattenuate 4 pixels at a time.
|
||||||
|
// aligned to 16 bytes
|
||||||
|
__declspec(naked) __declspec(align(16))
|
||||||
|
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
|
||||||
|
int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
mov eax, [esp + 8 + 4] // src_argb0
|
||||||
|
mov edx, [esp + 8 + 8] // dst_argb
|
||||||
|
mov ecx, [esp + 8 + 12] // width
|
||||||
|
sub edx, eax
|
||||||
|
pcmpeqb xmm4, xmm4 // generate mask 0xff000000
|
||||||
|
pslld xmm4, 24
|
||||||
|
|
||||||
|
align 16
|
||||||
|
convertloop:
|
||||||
|
movdqa xmm0, [eax] // read 4 pixels
|
||||||
|
movzx esi, byte ptr [eax + 3] // first alpha
|
||||||
|
movzx edi, byte ptr [eax + 7] // second alpha
|
||||||
|
punpcklbw xmm0, xmm0 // first 2
|
||||||
|
movd xmm2, dword ptr fixed_invtbl8[esi * 4]
|
||||||
|
movd xmm3, dword ptr fixed_invtbl8[edi * 4]
|
||||||
|
pshuflw xmm2, xmm2,0C0h // first 4 inv_alpha words
|
||||||
|
pshuflw xmm3, xmm3,0C0h // next 4 inv_alpha words
|
||||||
|
movlhps xmm2, xmm3
|
||||||
|
pmulhuw xmm0, xmm2 // rgb * a
|
||||||
|
|
||||||
|
movdqa xmm1, [eax] // read 4 pixels
|
||||||
|
movzx esi, byte ptr [eax + 11] // third alpha
|
||||||
|
movzx edi, byte ptr [eax + 15] // forth alpha
|
||||||
|
punpckhbw xmm1, xmm1 // next 2
|
||||||
|
movd xmm2, dword ptr fixed_invtbl8[esi * 4]
|
||||||
|
movd xmm3, dword ptr fixed_invtbl8[edi * 4]
|
||||||
|
pshuflw xmm2, xmm2,0C0h // first 4 inv_alpha words
|
||||||
|
pshuflw xmm3, xmm3,0C0h // next 4 inv_alpha words
|
||||||
|
movlhps xmm2, xmm3
|
||||||
|
pmulhuw xmm1, xmm2 // rgb * a
|
||||||
|
|
||||||
|
movdqa xmm2, [eax] // alphas
|
||||||
|
pand xmm2, xmm4
|
||||||
|
packuswb xmm0, xmm1
|
||||||
|
por xmm0, xmm2
|
||||||
|
sub ecx, 4
|
||||||
|
movdqa [eax + edx], xmm0
|
||||||
|
lea eax, [eax + 16]
|
||||||
|
jg convertloop
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // HAS_ARGBUNATTENUATE_SSE2
|
||||||
|
|
||||||
#endif // _M_IX86
|
#endif // _M_IX86
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@ -63,7 +63,7 @@ TEST_F(libyuvTest, BenchmakDjb2_C) {
|
|||||||
uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381);
|
uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381);
|
||||||
uint32 h1;
|
uint32 h1;
|
||||||
MaskCpuFlags(kCpuInitialized);
|
MaskCpuFlags(kCpuInitialized);
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
h1 = HashDjb2(src_a, kMaxTest, 5381);
|
h1 = HashDjb2(src_a, kMaxTest, 5381);
|
||||||
}
|
}
|
||||||
MaskCpuFlags(-1);
|
MaskCpuFlags(-1);
|
||||||
@ -80,7 +80,7 @@ TEST_F(libyuvTest, BenchmakDjb2_OPT) {
|
|||||||
}
|
}
|
||||||
uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381);
|
uint32 h2 = ReferenceHashDjb2(src_a, kMaxTest, 5381);
|
||||||
uint32 h1;
|
uint32 h1;
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
h1 = HashDjb2(src_a, kMaxTest, 5381);
|
h1 = HashDjb2(src_a, kMaxTest, 5381);
|
||||||
}
|
}
|
||||||
EXPECT_EQ(h1, h2);
|
EXPECT_EQ(h1, h2);
|
||||||
@ -96,7 +96,7 @@ TEST_F(libyuvTest, BenchmakDjb2_Unaligned_OPT) {
|
|||||||
}
|
}
|
||||||
uint32 h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381);
|
uint32 h2 = ReferenceHashDjb2(src_a + 1, kMaxTest, 5381);
|
||||||
uint32 h1;
|
uint32 h1;
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
h1 = HashDjb2(src_a + 1, kMaxTest, 5381);
|
h1 = HashDjb2(src_a + 1, kMaxTest, 5381);
|
||||||
}
|
}
|
||||||
EXPECT_EQ(h1, h2);
|
EXPECT_EQ(h1, h2);
|
||||||
@ -110,7 +110,7 @@ TEST_F(libyuvTest, BenchmarkSumSquareError_C) {
|
|||||||
align_buffer_16(src_b, max_width)
|
align_buffer_16(src_b, max_width)
|
||||||
|
|
||||||
MaskCpuFlags(kCpuInitialized);
|
MaskCpuFlags(kCpuInitialized);
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
ComputeSumSquareError(src_a, src_b, max_width);
|
ComputeSumSquareError(src_a, src_b, max_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,7 +128,7 @@ TEST_F(libyuvTest, BenchmarkSumSquareError_OPT) {
|
|||||||
align_buffer_16(src_a, max_width)
|
align_buffer_16(src_a, max_width)
|
||||||
align_buffer_16(src_b, max_width)
|
align_buffer_16(src_b, max_width)
|
||||||
|
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
ComputeSumSquareError(src_a, src_b, max_width);
|
ComputeSumSquareError(src_a, src_b, max_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -183,18 +183,18 @@ TEST_F(libyuvTest, SumSquareError) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, BenchmarkPsnr_C) {
|
TEST_F(libyuvTest, BenchmarkPsnr_C) {
|
||||||
align_buffer_16(src_a, _benchmark_width * _benchmark_height)
|
align_buffer_16(src_a, benchmark_width_ * benchmark_height_)
|
||||||
align_buffer_16(src_b, _benchmark_width * _benchmark_height)
|
align_buffer_16(src_b, benchmark_width_ * benchmark_height_)
|
||||||
|
|
||||||
MaskCpuFlags(kCpuInitialized);
|
MaskCpuFlags(kCpuInitialized);
|
||||||
|
|
||||||
double c_time = get_time();
|
double c_time = get_time();
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i)
|
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||||
CalcFramePsnr(src_a, _benchmark_width,
|
CalcFramePsnr(src_a, benchmark_width_,
|
||||||
src_b, _benchmark_width,
|
src_b, benchmark_width_,
|
||||||
_benchmark_width, _benchmark_height);
|
benchmark_width_, benchmark_height_);
|
||||||
|
|
||||||
c_time = (get_time() - c_time) / _benchmark_iterations;
|
c_time = (get_time() - c_time) / benchmark_iterations_;
|
||||||
printf("BenchmarkPsnr_C - %8.2f us c\n", c_time * 1e6);
|
printf("BenchmarkPsnr_C - %8.2f us c\n", c_time * 1e6);
|
||||||
|
|
||||||
MaskCpuFlags(-1);
|
MaskCpuFlags(-1);
|
||||||
@ -206,18 +206,18 @@ TEST_F(libyuvTest, BenchmarkPsnr_C) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, BenchmarkPsnr_OPT) {
|
TEST_F(libyuvTest, BenchmarkPsnr_OPT) {
|
||||||
align_buffer_16(src_a, _benchmark_width * _benchmark_height)
|
align_buffer_16(src_a, benchmark_width_ * benchmark_height_)
|
||||||
align_buffer_16(src_b, _benchmark_width * _benchmark_height)
|
align_buffer_16(src_b, benchmark_width_ * benchmark_height_)
|
||||||
|
|
||||||
MaskCpuFlags(-1);
|
MaskCpuFlags(-1);
|
||||||
|
|
||||||
double opt_time = get_time();
|
double opt_time = get_time();
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i)
|
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||||
CalcFramePsnr(src_a, _benchmark_width,
|
CalcFramePsnr(src_a, benchmark_width_,
|
||||||
src_b, _benchmark_width,
|
src_b, benchmark_width_,
|
||||||
_benchmark_width, _benchmark_height);
|
benchmark_width_, benchmark_height_);
|
||||||
|
|
||||||
opt_time = (get_time() - opt_time) / _benchmark_iterations;
|
opt_time = (get_time() - opt_time) / benchmark_iterations_;
|
||||||
printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6);
|
printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6);
|
||||||
|
|
||||||
EXPECT_EQ(0, 0);
|
EXPECT_EQ(0, 0);
|
||||||
@ -304,18 +304,18 @@ TEST_F(libyuvTest, Psnr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, BenchmarkSsim_C) {
|
TEST_F(libyuvTest, BenchmarkSsim_C) {
|
||||||
align_buffer_16(src_a, _benchmark_width * _benchmark_height)
|
align_buffer_16(src_a, benchmark_width_ * benchmark_height_)
|
||||||
align_buffer_16(src_b, _benchmark_width * _benchmark_height)
|
align_buffer_16(src_b, benchmark_width_ * benchmark_height_)
|
||||||
|
|
||||||
MaskCpuFlags(kCpuInitialized);
|
MaskCpuFlags(kCpuInitialized);
|
||||||
|
|
||||||
double c_time = get_time();
|
double c_time = get_time();
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i)
|
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||||
CalcFrameSsim(src_a, _benchmark_width,
|
CalcFrameSsim(src_a, benchmark_width_,
|
||||||
src_b, _benchmark_width,
|
src_b, benchmark_width_,
|
||||||
_benchmark_width, _benchmark_height);
|
benchmark_width_, benchmark_height_);
|
||||||
|
|
||||||
c_time = (get_time() - c_time) / _benchmark_iterations;
|
c_time = (get_time() - c_time) / benchmark_iterations_;
|
||||||
printf("BenchmarkSsim_C - %8.2f us c\n", c_time * 1e6);
|
printf("BenchmarkSsim_C - %8.2f us c\n", c_time * 1e6);
|
||||||
|
|
||||||
MaskCpuFlags(-1);
|
MaskCpuFlags(-1);
|
||||||
@ -327,18 +327,18 @@ TEST_F(libyuvTest, BenchmarkSsim_C) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(libyuvTest, BenchmarkSsim_OPT) {
|
TEST_F(libyuvTest, BenchmarkSsim_OPT) {
|
||||||
align_buffer_16(src_a, _benchmark_width * _benchmark_height)
|
align_buffer_16(src_a, benchmark_width_ * benchmark_height_)
|
||||||
align_buffer_16(src_b, _benchmark_width * _benchmark_height)
|
align_buffer_16(src_b, benchmark_width_ * benchmark_height_)
|
||||||
|
|
||||||
MaskCpuFlags(-1);
|
MaskCpuFlags(-1);
|
||||||
|
|
||||||
double opt_time = get_time();
|
double opt_time = get_time();
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i)
|
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||||
CalcFrameSsim(src_a, _benchmark_width,
|
CalcFrameSsim(src_a, benchmark_width_,
|
||||||
src_b, _benchmark_width,
|
src_b, benchmark_width_,
|
||||||
_benchmark_width, _benchmark_height);
|
benchmark_width_, benchmark_height_);
|
||||||
|
|
||||||
opt_time = (get_time() - opt_time) / _benchmark_iterations;
|
opt_time = (get_time() - opt_time) / benchmark_iterations_;
|
||||||
printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6);
|
printf("BenchmarkPsnr_OPT - %8.2f us opt\n", opt_time * 1e6);
|
||||||
|
|
||||||
EXPECT_EQ(0, 0);
|
EXPECT_EQ(0, 0);
|
||||||
|
|||||||
@ -8,14 +8,13 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "unit_test.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/version.h"
|
#include "libyuv/version.h"
|
||||||
|
#include "unit_test/unit_test.h"
|
||||||
|
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
|
|
||||||
|
|||||||
@ -8,8 +8,6 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "unit_test.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
@ -17,6 +15,7 @@
|
|||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
#include "libyuv/rotate.h"
|
#include "libyuv/rotate.h"
|
||||||
|
#include "unit_test/unit_test.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#define SIMD_ALIGNED(var) __declspec(align(16)) var
|
#define SIMD_ALIGNED(var) __declspec(align(16)) var
|
||||||
@ -26,20 +25,20 @@
|
|||||||
|
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
|
|
||||||
TEST_F (libyuvTest, BenchmarkI420ToARGB_C) {
|
TEST_F(libyuvTest, BenchmarkI420ToARGB_C) {
|
||||||
align_buffer_16(src_y, _benchmark_width * _benchmark_height);
|
align_buffer_16(src_y, benchmark_width_ * benchmark_height_);
|
||||||
align_buffer_16(src_u, ((_benchmark_width * _benchmark_height) >> 2));
|
align_buffer_16(src_u, (benchmark_width_ * benchmark_height_) >> 2);
|
||||||
align_buffer_16(src_v, ((_benchmark_width * _benchmark_height) >> 2));
|
align_buffer_16(src_v, (benchmark_width_ * benchmark_height_) >> 2);
|
||||||
align_buffer_16(dst_argb, ((_benchmark_width << 2) * _benchmark_height));
|
align_buffer_16(dst_argb, (benchmark_width_ << 2) * benchmark_height_);
|
||||||
|
|
||||||
MaskCpuFlags(kCpuInitialized);
|
MaskCpuFlags(kCpuInitialized);
|
||||||
|
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i)
|
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||||
I420ToARGB(src_y, _benchmark_width,
|
I420ToARGB(src_y, benchmark_width_,
|
||||||
src_u, _benchmark_width >> 1,
|
src_u, benchmark_width_ >> 1,
|
||||||
src_v, _benchmark_width >> 1,
|
src_v, benchmark_width_ >> 1,
|
||||||
dst_argb, _benchmark_width << 2,
|
dst_argb, benchmark_width_ << 2,
|
||||||
_benchmark_width, _benchmark_height);
|
benchmark_width_, benchmark_height_);
|
||||||
|
|
||||||
MaskCpuFlags(-1);
|
MaskCpuFlags(-1);
|
||||||
|
|
||||||
@ -51,18 +50,18 @@ TEST_F (libyuvTest, BenchmarkI420ToARGB_C) {
|
|||||||
free_aligned_buffer_16(dst_argb)
|
free_aligned_buffer_16(dst_argb)
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F (libyuvTest, BenchmarkI420ToARGB_OPT) {
|
TEST_F(libyuvTest, BenchmarkI420ToARGB_OPT) {
|
||||||
align_buffer_16(src_y, _benchmark_width * _benchmark_height);
|
align_buffer_16(src_y, benchmark_width_ * benchmark_height_);
|
||||||
align_buffer_16(src_u, (_benchmark_width * _benchmark_height) >> 2);
|
align_buffer_16(src_u, (benchmark_width_ * benchmark_height_) >> 2);
|
||||||
align_buffer_16(src_v, (_benchmark_width * _benchmark_height) >> 2);
|
align_buffer_16(src_v, (benchmark_width_ * benchmark_height_) >> 2);
|
||||||
align_buffer_16(dst_argb, (_benchmark_width << 2) * _benchmark_height);
|
align_buffer_16(dst_argb, (benchmark_width_ << 2) * benchmark_height_);
|
||||||
|
|
||||||
for (int i = 0; i < _benchmark_iterations; ++i)
|
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||||
I420ToARGB(src_y, _benchmark_width,
|
I420ToARGB(src_y, benchmark_width_,
|
||||||
src_u, _benchmark_width >> 1,
|
src_u, benchmark_width_ >> 1,
|
||||||
src_v, _benchmark_width >> 1,
|
src_v, benchmark_width_ >> 1,
|
||||||
dst_argb, _benchmark_width << 2,
|
dst_argb, benchmark_width_ << 2,
|
||||||
_benchmark_width, _benchmark_height);
|
benchmark_width_, benchmark_height_);
|
||||||
|
|
||||||
free_aligned_buffer_16(src_y)
|
free_aligned_buffer_16(src_y)
|
||||||
free_aligned_buffer_16(src_u)
|
free_aligned_buffer_16(src_u)
|
||||||
@ -71,7 +70,7 @@ TEST_F (libyuvTest, BenchmarkI420ToARGB_OPT) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define TESTI420TO(FMT) \
|
#define TESTI420TO(FMT) \
|
||||||
TEST_F (libyuvTest, I420To##FMT##_CvsOPT) { \
|
TEST_F(libyuvTest, I420To##FMT##_CvsOPT) { \
|
||||||
const int src_width = 1280; \
|
const int src_width = 1280; \
|
||||||
const int src_height = 720; \
|
const int src_height = 720; \
|
||||||
align_buffer_16(src_y, src_width * src_height); \
|
align_buffer_16(src_y, src_width * src_height); \
|
||||||
@ -103,8 +102,8 @@ TEST_F (libyuvTest, I420To##FMT##_CvsOPT) { \
|
|||||||
int err = 0; \
|
int err = 0; \
|
||||||
for (int i = 0; i < src_height; ++i) { \
|
for (int i = 0; i < src_height; ++i) { \
|
||||||
for (int j = 0; j < src_width << 2; ++j) { \
|
for (int j = 0; j < src_width << 2; ++j) { \
|
||||||
int diff = (int)(dst_rgb_c[i * src_height + j]) - \
|
int diff = static_cast<int>(dst_rgb_c[i * src_height + j]) - \
|
||||||
(int)(dst_rgb_opt[i * src_height + j]); \
|
static_cast<int>(dst_rgb_opt[i * src_height + j]); \
|
||||||
if (abs(diff) > 2) \
|
if (abs(diff) > 2) \
|
||||||
err++; \
|
err++; \
|
||||||
} \
|
} \
|
||||||
@ -121,11 +120,48 @@ TESTI420TO(ARGB)
|
|||||||
TESTI420TO(BGRA)
|
TESTI420TO(BGRA)
|
||||||
TESTI420TO(ABGR)
|
TESTI420TO(ABGR)
|
||||||
|
|
||||||
TEST_F (libyuvTest, TestAttenuate) {
|
TEST_F(libyuvTest, TestAttenuate) {
|
||||||
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
|
||||||
SIMD_ALIGNED(uint8 atten_pixels[256][4]);
|
SIMD_ALIGNED(uint8 atten_pixels[256][4]);
|
||||||
SIMD_ALIGNED(uint8 unatten_pixels[256][4]);
|
SIMD_ALIGNED(uint8 unatten_pixels[256][4]);
|
||||||
SIMD_ALIGNED(uint8 atten2_pixels[256][4]);
|
SIMD_ALIGNED(uint8 atten2_pixels[256][4]);
|
||||||
|
|
||||||
|
// Test unattenuation clamps
|
||||||
|
orig_pixels[0][0] = 200u;
|
||||||
|
orig_pixels[0][1] = 129u;
|
||||||
|
orig_pixels[0][2] = 127u;
|
||||||
|
orig_pixels[0][3] = 128u;
|
||||||
|
// Test unattenuation transparent and opaque are unaffected
|
||||||
|
orig_pixels[1][0] = 16u;
|
||||||
|
orig_pixels[1][1] = 64u;
|
||||||
|
orig_pixels[1][2] = 192u;
|
||||||
|
orig_pixels[1][3] = 0u;
|
||||||
|
orig_pixels[2][0] = 16u;
|
||||||
|
orig_pixels[2][1] = 64u;
|
||||||
|
orig_pixels[2][2] = 192u;
|
||||||
|
orig_pixels[2][3] = 255u;
|
||||||
|
orig_pixels[3][0] = 16u;
|
||||||
|
orig_pixels[3][1] = 64u;
|
||||||
|
orig_pixels[3][2] = 192u;
|
||||||
|
orig_pixels[3][3] = 128u;
|
||||||
|
ARGBUnattenuate(&orig_pixels[0][0], 0, &unatten_pixels[0][0], 0, 4, 1);
|
||||||
|
EXPECT_EQ(255u, unatten_pixels[0][0]);
|
||||||
|
EXPECT_EQ(255u, unatten_pixels[0][1]);
|
||||||
|
EXPECT_EQ(254u, unatten_pixels[0][2]);
|
||||||
|
EXPECT_EQ(128u, unatten_pixels[0][3]);
|
||||||
|
EXPECT_EQ(16u, unatten_pixels[1][0]);
|
||||||
|
EXPECT_EQ(64u, unatten_pixels[1][1]);
|
||||||
|
EXPECT_EQ(192u, unatten_pixels[1][2]);
|
||||||
|
EXPECT_EQ(0u, unatten_pixels[1][3]);
|
||||||
|
EXPECT_EQ(16u, unatten_pixels[2][0]);
|
||||||
|
EXPECT_EQ(64u, unatten_pixels[2][1]);
|
||||||
|
EXPECT_EQ(192u, unatten_pixels[2][2]);
|
||||||
|
EXPECT_EQ(255u, unatten_pixels[2][3]);
|
||||||
|
EXPECT_EQ(32u, unatten_pixels[3][0]);
|
||||||
|
EXPECT_EQ(128u, unatten_pixels[3][1]);
|
||||||
|
EXPECT_EQ(255u, unatten_pixels[3][2]);
|
||||||
|
EXPECT_EQ(128u, unatten_pixels[3][3]);
|
||||||
|
|
||||||
for (int i = 0; i < 256; ++i) {
|
for (int i = 0; i < 256; ++i) {
|
||||||
orig_pixels[i][0] = i;
|
orig_pixels[i][0] = i;
|
||||||
orig_pixels[i][1] = i / 2;
|
orig_pixels[i][1] = i / 2;
|
||||||
@ -156,17 +192,5 @@ TEST_F (libyuvTest, TestAttenuate) {
|
|||||||
EXPECT_EQ(127, atten_pixels[255][1]);
|
EXPECT_EQ(127, atten_pixels[255][1]);
|
||||||
EXPECT_EQ(85, atten_pixels[255][2]);
|
EXPECT_EQ(85, atten_pixels[255][2]);
|
||||||
EXPECT_EQ(255, atten_pixels[255][3]);
|
EXPECT_EQ(255, atten_pixels[255][3]);
|
||||||
|
|
||||||
// Test unattenuation clamps
|
|
||||||
orig_pixels[0][0] = 200;
|
|
||||||
orig_pixels[0][1] = 129;
|
|
||||||
orig_pixels[0][2] = 127;
|
|
||||||
orig_pixels[0][3] = 128;
|
|
||||||
ARGBUnattenuate(&orig_pixels[0][0], 0, &unatten_pixels[0][0], 0, 1, 1);
|
|
||||||
EXPECT_EQ(255, unatten_pixels[0][0]);
|
|
||||||
EXPECT_EQ(255, unatten_pixels[0][1]);
|
|
||||||
EXPECT_EQ(254, unatten_pixels[0][2]);
|
|
||||||
EXPECT_EQ(128, unatten_pixels[0][3]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -8,13 +8,12 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "unit_test.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include "libyuv/rotate.h"
|
|
||||||
#include "../source/rotate_priv.h"
|
#include "../source/rotate_priv.h"
|
||||||
|
#include "libyuv/rotate.h"
|
||||||
|
#include "unit_test/unit_test.h"
|
||||||
|
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
|
|
||||||
@ -33,8 +32,8 @@ TEST_F(libyuvTest, Transpose) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 8; iw < _rotate_max_w && !err; ++iw)
|
for (iw = 8; iw < rotate_max_w_ && !err; ++iw)
|
||||||
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
|
for (ih = 8; ih < rotate_max_h_ && !err; ++ih) {
|
||||||
int i;
|
int i;
|
||||||
ow = ih;
|
ow = ih;
|
||||||
oh = iw;
|
oh = iw;
|
||||||
@ -77,8 +76,8 @@ TEST_F(libyuvTest, TransposeUV) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 16; iw < _rotate_max_w && !err; iw += 2)
|
for (iw = 16; iw < rotate_max_w_ && !err; iw += 2)
|
||||||
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
|
for (ih = 8; ih < rotate_max_h_ && !err; ++ih) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = ih;
|
ow = ih;
|
||||||
@ -134,8 +133,8 @@ TEST_F(libyuvTest, RotatePlane90) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 8; iw < _rotate_max_w && !err; ++iw)
|
for (iw = 8; iw < rotate_max_w_ && !err; ++iw)
|
||||||
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
|
for (ih = 8; ih < rotate_max_h_ && !err; ++ih) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = ih;
|
ow = ih;
|
||||||
@ -191,8 +190,8 @@ TEST_F(libyuvTest, RotateUV90) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 16; iw < _rotate_max_w && !err; iw += 2)
|
for (iw = 16; iw < rotate_max_w_ && !err; iw += 2)
|
||||||
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
|
for (ih = 8; ih < rotate_max_h_ && !err; ++ih) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = ih;
|
ow = ih;
|
||||||
@ -265,8 +264,8 @@ TEST_F(libyuvTest, RotateUV180) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 16; iw < _rotate_max_w && !err; iw += 2)
|
for (iw = 16; iw < rotate_max_w_ && !err; iw += 2)
|
||||||
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
|
for (ih = 8; ih < rotate_max_h_ && !err; ++ih) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = iw >> 1;
|
ow = iw >> 1;
|
||||||
@ -339,8 +338,8 @@ TEST_F(libyuvTest, RotateUV270) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 16; iw < _rotate_max_w && !err; iw += 2)
|
for (iw = 16; iw < rotate_max_w_ && !err; iw += 2)
|
||||||
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
|
for (ih = 8; ih < rotate_max_h_ && !err; ++ih) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = ih;
|
ow = ih;
|
||||||
@ -414,8 +413,8 @@ TEST_F(libyuvTest, RotatePlane180) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 8; iw < _rotate_max_w && !err; ++iw)
|
for (iw = 8; iw < rotate_max_w_ && !err; ++iw)
|
||||||
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
|
for (ih = 8; ih < rotate_max_h_ && !err; ++ih) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = iw;
|
ow = iw;
|
||||||
@ -459,8 +458,8 @@ TEST_F(libyuvTest, RotatePlane270) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 8; iw < _rotate_max_w && !err; ++iw)
|
for (iw = 8; iw < rotate_max_w_ && !err; ++iw)
|
||||||
for (ih = 8; ih < _rotate_max_h && !err; ++ih) {
|
for (ih = 8; ih < rotate_max_h_ && !err; ++ih) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = ih;
|
ow = ih;
|
||||||
@ -516,8 +515,8 @@ TEST_F(libyuvTest, RotatePlane90and270) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 16; iw < _rotate_max_w && !err; iw += 4)
|
for (iw = 16; iw < rotate_max_w_ && !err; iw += 4)
|
||||||
for (ih = 16; ih < _rotate_max_h && !err; ih += 4) {
|
for (ih = 16; ih < rotate_max_h_ && !err; ih += 4) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = ih;
|
ow = ih;
|
||||||
@ -561,8 +560,8 @@ TEST_F(libyuvTest, RotatePlane90Pitch) {
|
|||||||
int iw, ih;
|
int iw, ih;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 16; iw < _rotate_max_w && !err; iw += 4)
|
for (iw = 16; iw < rotate_max_w_ && !err; iw += 4)
|
||||||
for (ih = 16; ih < _rotate_max_h && !err; ih += 4) {
|
for (ih = 16; ih < rotate_max_h_ && !err; ih += 4) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
int ow = ih;
|
int ow = ih;
|
||||||
@ -618,8 +617,8 @@ TEST_F(libyuvTest, RotatePlane270Pitch) {
|
|||||||
int iw, ih, ow, oh;
|
int iw, ih, ow, oh;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
for (iw = 16; iw < _rotate_max_w && !err; iw += 4)
|
for (iw = 16; iw < rotate_max_w_ && !err; iw += 4)
|
||||||
for (ih = 16; ih < _rotate_max_h && !err; ih += 4) {
|
for (ih = 16; ih < rotate_max_h_ && !err; ih += 4) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ow = ih;
|
ow = ih;
|
||||||
|
|||||||
@ -8,13 +8,12 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "unit_test.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/scale.h"
|
#include "libyuv/scale.h"
|
||||||
|
#include "unit_test/unit_test.h"
|
||||||
|
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
|
|
||||||
|
|||||||
@ -8,15 +8,13 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <cstring>
|
#include "unit_test/unit_test.h"
|
||||||
#include "unit_test.h"
|
|
||||||
|
|
||||||
libyuvTest::libyuvTest() :
|
#include <cstring>
|
||||||
_rotate_max_w(128),
|
|
||||||
_rotate_max_h(128),
|
libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128),
|
||||||
_benchmark_iterations(1000),
|
benchmark_iterations_(1000), benchmark_width_(1280),
|
||||||
_benchmark_width(1280),
|
benchmark_height_(720) {
|
||||||
_benchmark_height(720) {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|||||||
@ -8,17 +8,17 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef UINIT_TEST_H_
|
#ifndef UNIT_TEST_UNIT_TEST_H_
|
||||||
#define UINIT_TEST_H_
|
#define UNIT_TEST_UNIT_TEST_H_
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#define align_buffer_16(var, size) \
|
#define align_buffer_16(var, size) \
|
||||||
uint8 *var; \
|
uint8* var; \
|
||||||
uint8 *var##_mem; \
|
uint8* var##_mem; \
|
||||||
var##_mem = reinterpret_cast<uint8*>(calloc((size)+15, sizeof(uint8))); \
|
var##_mem = reinterpret_cast<uint8*>(calloc((size) + 15, sizeof(uint8))); \
|
||||||
var = reinterpret_cast<uint8*> \
|
var = reinterpret_cast<uint8*> \
|
||||||
((reinterpret_cast<intptr_t>(var##_mem) + 15) & (~0x0f));
|
((reinterpret_cast<intptr_t>(var##_mem) + 15) & (~0x0f)); \
|
||||||
|
|
||||||
#define free_aligned_buffer_16(var) \
|
#define free_aligned_buffer_16(var) \
|
||||||
free(var##_mem); \
|
free(var##_mem); \
|
||||||
@ -27,12 +27,11 @@
|
|||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
static double get_time()
|
static double get_time() {
|
||||||
{
|
LARGE_INTEGER t, f;
|
||||||
LARGE_INTEGER t, f;
|
QueryPerformanceCounter(&t);
|
||||||
QueryPerformanceCounter(&t);
|
QueryPerformanceFrequency(&f);
|
||||||
QueryPerformanceFrequency(&f);
|
return static_cast<double>(t.QuadPart) / static_cast<double>(f.QuadPart);
|
||||||
return double(t.QuadPart)/double(f.QuadPart);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define random rand
|
#define random rand
|
||||||
@ -46,7 +45,7 @@ static double get_time() {
|
|||||||
struct timeval t;
|
struct timeval t;
|
||||||
struct timezone tzp;
|
struct timezone tzp;
|
||||||
gettimeofday(&t, &tzp);
|
gettimeofday(&t, &tzp);
|
||||||
return t.tv_sec + t.tv_usec*1e-6;
|
return t.tv_sec + t.tv_usec * 1e-6;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -55,13 +54,12 @@ class libyuvTest : public ::testing::Test {
|
|||||||
protected:
|
protected:
|
||||||
libyuvTest();
|
libyuvTest();
|
||||||
|
|
||||||
const int _rotate_max_w;
|
const int rotate_max_w_;
|
||||||
const int _rotate_max_h;
|
const int rotate_max_h_;
|
||||||
|
|
||||||
const int _benchmark_iterations;
|
|
||||||
const int _benchmark_width;
|
|
||||||
const int _benchmark_height;
|
|
||||||
|
|
||||||
|
const int benchmark_iterations_;
|
||||||
|
const int benchmark_width_;
|
||||||
|
const int benchmark_height_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // UNIT_TEST_H_
|
#endif // UNIT_TEST_UNIT_TEST_H_
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user