mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
AVX2 YUV alpha blender and improved unittests
AVX2 version can process 16 pixels at a time for improved memory bandwidth and fewer instructions. unittests improved to test unaligned memory, and test exactness when alpha is 0 or 255. R=dhrosa@google.com, harryjin@google.com BUG=libyuv:527 Review URL: https://codereview.chromium.org/1505433002 .
This commit is contained in:
parent
fa2618ee26
commit
bea690b3e0
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1547
|
Version: 1548
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -302,6 +302,7 @@ LIBYUV_API
|
|||||||
ARGBBlendRow GetARGBBlend();
|
ARGBBlendRow GetARGBBlend();
|
||||||
|
|
||||||
// Alpha Blend ARGB images and store to destination.
|
// Alpha Blend ARGB images and store to destination.
|
||||||
|
// Source is pre-multiplied by alpha using ARGBAttenuate.
|
||||||
// Alpha of destination is set to 255.
|
// Alpha of destination is set to 255.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
|
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
|
||||||
@ -309,6 +310,31 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
|
|||||||
uint8* dst_argb, int dst_stride_argb,
|
uint8* dst_argb, int dst_stride_argb,
|
||||||
int width, int height);
|
int width, int height);
|
||||||
|
|
||||||
|
// Alpha Blend plane and store to destination.
|
||||||
|
// Source is not pre-multiplied by alpha.
|
||||||
|
LIBYUV_API
|
||||||
|
int BlendPlane(const uint8* src_y0, int src_stride_y0,
|
||||||
|
const uint8* src_y1, int src_stride_y1,
|
||||||
|
const uint8* alpha, int alpha_stride,
|
||||||
|
uint8* dst_y, int dst_stride_y,
|
||||||
|
int width, int height);
|
||||||
|
|
||||||
|
// Alpha Blend YUV images and store to destination.
|
||||||
|
// Source is not pre-multiplied by alpha.
|
||||||
|
// Alpha is full width x height and subsampled to half size to apply to UV.
|
||||||
|
LIBYUV_API
|
||||||
|
int I420Blend(const uint8* src_y0, int src_stride_y0,
|
||||||
|
const uint8* src_u0, int src_stride_u0,
|
||||||
|
const uint8* src_v0, int src_stride_v0,
|
||||||
|
const uint8* src_y1, int src_stride_y1,
|
||||||
|
const uint8* src_u1, int src_stride_u1,
|
||||||
|
const uint8* src_v1, int src_stride_v1,
|
||||||
|
const uint8* alpha, int alpha_stride,
|
||||||
|
uint8* dst_y, int dst_stride_y,
|
||||||
|
uint8* dst_u, int dst_stride_u,
|
||||||
|
uint8* dst_v, int dst_stride_v,
|
||||||
|
int width, int height);
|
||||||
|
|
||||||
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
|
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
|
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
|
||||||
|
|||||||
@ -233,6 +233,7 @@ extern "C" {
|
|||||||
#define HAS_ARGBMULTIPLYROW_AVX2
|
#define HAS_ARGBMULTIPLYROW_AVX2
|
||||||
#define HAS_ARGBSUBTRACTROW_AVX2
|
#define HAS_ARGBSUBTRACTROW_AVX2
|
||||||
#define HAS_ARGBUNATTENUATEROW_AVX2
|
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||||
|
#define HAS_BLENDPLANEROW_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for AVX2 Visual C and clangcl 32 bit:
|
// The following are available for AVX2 Visual C and clangcl 32 bit:
|
||||||
@ -253,12 +254,6 @@ extern "C" {
|
|||||||
#define HAS_RGB565TOARGBROW_AVX2
|
#define HAS_RGB565TOARGBROW_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for 32 bit Visual C and clangcl 32 bit:
|
|
||||||
// TODO(fbarchard): Port to gcc.
|
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
|
||||||
#define HAS_BLENDPLANEROW_SSSE3
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// The following are also available on x64 Visual C.
|
// The following are also available on x64 Visual C.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
|
||||||
(!defined(__clang__) || defined(__SSSE3__))
|
(!defined(__clang__) || defined(__SSSE3__))
|
||||||
@ -1464,6 +1459,12 @@ void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
|
|||||||
// Unattenuated planar alpha blend.
|
// Unattenuated planar alpha blend.
|
||||||
void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
|
void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
|
||||||
const uint8* alpha, uint8* dst, int width);
|
const uint8* alpha, uint8* dst, int width);
|
||||||
|
void BlendPlaneRow_Any_SSSE3(const uint8* src0, const uint8* src1,
|
||||||
|
const uint8* alpha, uint8* dst, int width);
|
||||||
|
void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
|
||||||
|
const uint8* alpha, uint8* dst, int width);
|
||||||
|
void BlendPlaneRow_Any_AVX2(const uint8* src0, const uint8* src1,
|
||||||
|
const uint8* alpha, uint8* dst, int width);
|
||||||
void BlendPlaneRow_C(const uint8* src0, const uint8* src1,
|
void BlendPlaneRow_C(const uint8* src0, const uint8* src1,
|
||||||
const uint8* alpha, uint8* dst, int width);
|
const uint8* alpha, uint8* dst, int width);
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1547
|
#define LIBYUV_VERSION 1548
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -17,6 +17,7 @@
|
|||||||
#include "libyuv/mjpeg_decoder.h"
|
#include "libyuv/mjpeg_decoder.h"
|
||||||
#endif
|
#endif
|
||||||
#include "libyuv/row.h"
|
#include "libyuv/row.h"
|
||||||
|
#include "libyuv/scale_row.h" // for ScaleRowDown2
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -577,6 +578,167 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Alpha Blend plane and store to destination.
|
||||||
|
LIBYUV_API
|
||||||
|
int BlendPlane(const uint8* src_y0, int src_stride_y0,
|
||||||
|
const uint8* src_y1, int src_stride_y1,
|
||||||
|
const uint8* alpha, int alpha_stride,
|
||||||
|
uint8* dst_y, int dst_stride_y,
|
||||||
|
int width, int height) {
|
||||||
|
int y;
|
||||||
|
void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
|
||||||
|
const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
|
||||||
|
if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Negative height means invert the image.
|
||||||
|
if (height < 0) {
|
||||||
|
height = -height;
|
||||||
|
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||||
|
dst_stride_y = -dst_stride_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Coalesce rows for Y plane.
|
||||||
|
if (src_stride_y0 == width &&
|
||||||
|
src_stride_y1 == width &&
|
||||||
|
alpha_stride == width &&
|
||||||
|
dst_stride_y == width) {
|
||||||
|
width *= height;
|
||||||
|
height = 1;
|
||||||
|
src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(HAS_BLENDPLANEROW_SSSE3)
|
||||||
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
|
// TODO(fbarchard): Implement any versions for odd width.
|
||||||
|
// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
|
||||||
|
if (IS_ALIGNED(width, 8)) {
|
||||||
|
BlendPlaneRow = BlendPlaneRow_SSSE3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_BLENDPLANEROW_AVX2)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
|
// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
BlendPlaneRow = BlendPlaneRow_AVX2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (y = 0; y < height; ++y) {
|
||||||
|
BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
|
||||||
|
src_y0 += src_stride_y0;
|
||||||
|
src_y1 += src_stride_y1;
|
||||||
|
alpha += alpha_stride;
|
||||||
|
dst_y += dst_stride_y;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MAXTWIDTH 2048
|
||||||
|
// Alpha Blend YUV images and store to destination.
|
||||||
|
LIBYUV_API
|
||||||
|
int I420Blend(const uint8* src_y0, int src_stride_y0,
|
||||||
|
const uint8* src_u0, int src_stride_u0,
|
||||||
|
const uint8* src_v0, int src_stride_v0,
|
||||||
|
const uint8* src_y1, int src_stride_y1,
|
||||||
|
const uint8* src_u1, int src_stride_u1,
|
||||||
|
const uint8* src_v1, int src_stride_v1,
|
||||||
|
const uint8* alpha, int alpha_stride,
|
||||||
|
uint8* dst_y, int dst_stride_y,
|
||||||
|
uint8* dst_u, int dst_stride_u,
|
||||||
|
uint8* dst_v, int dst_stride_v,
|
||||||
|
int width, int height) {
|
||||||
|
int y;
|
||||||
|
void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
|
||||||
|
const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
|
||||||
|
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||||
|
uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
|
||||||
|
if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
|
||||||
|
!alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Negative height means invert the image.
|
||||||
|
if (height < 0) {
|
||||||
|
height = -height;
|
||||||
|
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||||
|
dst_stride_y = -dst_stride_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blend Y plane.
|
||||||
|
BlendPlane(src_y0, src_stride_y0,
|
||||||
|
src_y1, src_stride_y1,
|
||||||
|
alpha, alpha_stride,
|
||||||
|
dst_y, dst_stride_y,
|
||||||
|
width, height);
|
||||||
|
|
||||||
|
// Half width/height for UV.
|
||||||
|
width = (width + 1) >> 1;
|
||||||
|
height = (height + 1) >> 1;
|
||||||
|
|
||||||
|
#if defined(HAS_BLENDPLANEROW_SSSE3)
|
||||||
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
|
// TODO(fbarchard): Implement any versions for odd width.
|
||||||
|
// BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
|
||||||
|
if (IS_ALIGNED(width, 8)) {
|
||||||
|
BlendPlaneRow = BlendPlaneRow_SSSE3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_BLENDPLANEROW_AVX2)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
|
// BlendPlaneRow = BlendPlaneRow_Any_AVX2;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
BlendPlaneRow = BlendPlaneRow_AVX2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_SCALEROWDOWN2_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
ScaleRowDown2 = ScaleRowDown2Box_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_SCALEROWDOWN2_SSE2)
|
||||||
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
|
ScaleRowDown2 = ScaleRowDown2Box_Any_SSE2;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
ScaleRowDown2 = ScaleRowDown2Box_SSE2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_SCALEROWDOWN2_AVX2)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
|
ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
|
||||||
|
if (IS_ALIGNED(width, 32)) {
|
||||||
|
ScaleRowDown2 = ScaleRowDown2Box_AVX2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Row buffer for intermediate alpha pixels.
|
||||||
|
align_buffer_64(halfalpha, width);
|
||||||
|
for (y = 0; y < height; ++y) {
|
||||||
|
// Subsample 2 rows of UV to half width and half height.
|
||||||
|
ScaleRowDown2(alpha, alpha_stride, halfalpha, width);
|
||||||
|
alpha += alpha_stride * 2;
|
||||||
|
BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, width);
|
||||||
|
BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, width);
|
||||||
|
src_u0 += src_stride_u0;
|
||||||
|
src_u1 += src_stride_u1;
|
||||||
|
dst_u += dst_stride_u;
|
||||||
|
src_v0 += src_stride_v0;
|
||||||
|
src_v1 += src_stride_v1;
|
||||||
|
dst_v += dst_stride_v;
|
||||||
|
}
|
||||||
|
free_aligned_buffer_64(halfalpha);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Multiply 2 ARGB images and store to destination.
|
// Multiply 2 ARGB images and store to destination.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
|
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
|
||||||
|
|||||||
@ -3467,7 +3467,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
|
|||||||
}
|
}
|
||||||
#endif // HAS_ARGBBLENDROW_SSSE3
|
#endif // HAS_ARGBBLENDROW_SSSE3
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAS_BLENDPLANEROW_SSSE3
|
#ifdef HAS_BLENDPLANEROW_SSSE3
|
||||||
// Blend 8 pixels at a time.
|
// Blend 8 pixels at a time.
|
||||||
// =((G2*C2)+(H2*(D2))+32768+127)/256
|
// =((G2*C2)+(H2*(D2))+32768+127)/256
|
||||||
@ -3514,6 +3513,56 @@ void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
|
|||||||
}
|
}
|
||||||
#endif // HAS_BLENDPLANEROW_SSSE3
|
#endif // HAS_BLENDPLANEROW_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_BLENDPLANEROW_AVX2
|
||||||
|
// Blend 16 pixels at a time.
|
||||||
|
// =((G2*C2)+(H2*(D2))+32768+127)/256
|
||||||
|
void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
|
||||||
|
const uint8* alpha, uint8* dst, int width) {
|
||||||
|
asm volatile (
|
||||||
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
"vpsllw $0x8,%%ymm5,%%ymm5 \n"
|
||||||
|
"mov $0x80808080,%%eax \n"
|
||||||
|
"vmovd %%eax,%%xmm6 \n"
|
||||||
|
"vbroadcastss %%xmm6,%%ymm6 \n"
|
||||||
|
"mov $0x807f807f,%%eax \n"
|
||||||
|
"vmovd %%eax,%%xmm7 \n"
|
||||||
|
"vbroadcastss %%xmm7,%%ymm7 \n"
|
||||||
|
"sub %2,%0 \n"
|
||||||
|
"sub %2,%1 \n"
|
||||||
|
"sub %2,%3 \n"
|
||||||
|
|
||||||
|
// 16 pixel loop.
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
"vmovdqu (%2),%%xmm0 \n"
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpxor %%ymm5,%%ymm0,%%ymm0 \n"
|
||||||
|
"vmovdqu (%0,%2,1),%%xmm1 \n"
|
||||||
|
"vmovdqu (%1,%2,1),%%xmm2 \n"
|
||||||
|
"vpermq $0xd8,%%ymm1,%%ymm1 \n"
|
||||||
|
"vpermq $0xd8,%%ymm2,%%ymm2 \n"
|
||||||
|
"vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
|
||||||
|
"vpsubb %%ymm6,%%ymm1,%%ymm1 \n"
|
||||||
|
"vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpaddw %%ymm7,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
|
||||||
|
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
|
||||||
|
"vmovdqu %%xmm0,(%3,%2,1) \n"
|
||||||
|
"lea 0x10(%2),%2 \n"
|
||||||
|
"sub $0x10,%4 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src0), // %0
|
||||||
|
"+r"(src1), // %1
|
||||||
|
"+r"(alpha), // %2
|
||||||
|
"+r"(dst), // %3
|
||||||
|
"+r"(width) // %4
|
||||||
|
:: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#endif // HAS_BLENDPLANEROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBATTENUATEROW_SSSE3
|
#ifdef HAS_ARGBATTENUATEROW_SSSE3
|
||||||
// Shuffle table duplicating alpha
|
// Shuffle table duplicating alpha
|
||||||
|
|||||||
@ -525,7 +525,7 @@ void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb,
|
|||||||
vmovd xmm5, eax
|
vmovd xmm5, eax
|
||||||
vbroadcastss ymm5, xmm5
|
vbroadcastss ymm5, xmm5
|
||||||
mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits
|
mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits
|
||||||
movd xmm6, eax
|
vmovd xmm6, eax
|
||||||
vbroadcastss ymm6, xmm6
|
vbroadcastss ymm6, xmm6
|
||||||
vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
|
vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
|
||||||
vpsllw ymm3, ymm3, 11
|
vpsllw ymm3, ymm3, 11
|
||||||
@ -576,7 +576,7 @@ void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
|
|||||||
vmovd xmm5, eax
|
vmovd xmm5, eax
|
||||||
vbroadcastss ymm5, xmm5
|
vbroadcastss ymm5, xmm5
|
||||||
mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits
|
mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits
|
||||||
movd xmm6, eax
|
vmovd xmm6, eax
|
||||||
vbroadcastss ymm6, xmm6
|
vbroadcastss ymm6, xmm6
|
||||||
vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
|
vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red
|
||||||
vpsllw ymm3, ymm3, 11
|
vpsllw ymm3, ymm3, 11
|
||||||
@ -4106,7 +4106,7 @@ void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
|
|||||||
movq qword ptr [edi + esi], xmm0
|
movq qword ptr [edi + esi], xmm0
|
||||||
lea esi, [esi + 8]
|
lea esi, [esi + 8]
|
||||||
sub ecx, 8
|
sub ecx, 8
|
||||||
jge convertloop8
|
jg convertloop8
|
||||||
|
|
||||||
pop edi
|
pop edi
|
||||||
pop esi
|
pop esi
|
||||||
@ -4115,6 +4115,62 @@ void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
|
|||||||
}
|
}
|
||||||
#endif // HAS_BLENDPLANEROW_SSSE3
|
#endif // HAS_BLENDPLANEROW_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_BLENDPLANEROW_AVX2
|
||||||
|
// Blend 16 pixels at a time.
|
||||||
|
// =((G2*C2)+(H2*(D2))+32768+127)/256
|
||||||
|
__declspec(naked)
|
||||||
|
void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
|
||||||
|
const uint8* alpha, uint8* dst, int width) {
|
||||||
|
__asm {
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff00ff00
|
||||||
|
vpsllw ymm5, ymm5, 8
|
||||||
|
mov eax, 0x80808080 // 128 for biasing image to signed.
|
||||||
|
vmovd xmm6, eax
|
||||||
|
vbroadcastss ymm6, xmm6
|
||||||
|
mov eax, 0x807f807f // 32768 + 127 for unbias and round.
|
||||||
|
vmovd xmm7, eax
|
||||||
|
vbroadcastss ymm7, xmm7
|
||||||
|
mov eax, [esp + 8 + 4] // src0
|
||||||
|
mov edx, [esp + 8 + 8] // src1
|
||||||
|
mov esi, [esp + 8 + 12] // alpha
|
||||||
|
mov edi, [esp + 8 + 16] // dst
|
||||||
|
mov ecx, [esp + 8 + 20] // width
|
||||||
|
sub eax, esi
|
||||||
|
sub edx, esi
|
||||||
|
sub edi, esi
|
||||||
|
|
||||||
|
// 16 pixel loop.
|
||||||
|
convertloop16:
|
||||||
|
vmovdqu xmm0, [esi] // alpha
|
||||||
|
vpermq ymm0, ymm0, 0xd8
|
||||||
|
vpunpcklbw ymm0, ymm0, ymm0
|
||||||
|
vpxor ymm0, ymm0, ymm5 // a, 255-a
|
||||||
|
vmovdqu xmm1, [eax + esi] // src0
|
||||||
|
vmovdqu xmm2, [edx + esi] // src1
|
||||||
|
vpermq ymm1, ymm1, 0xd8
|
||||||
|
vpermq ymm2, ymm2, 0xd8
|
||||||
|
vpunpcklbw ymm1, ymm1, ymm2
|
||||||
|
vpsubb ymm1, ymm1, ymm6 // bias src0/1 - 128
|
||||||
|
vpmaddubsw ymm0, ymm0, ymm1
|
||||||
|
vpaddw ymm0, ymm0, ymm7 // unbias result - 32768 and round.
|
||||||
|
vpsrlw ymm0, ymm0, 8
|
||||||
|
vpackuswb ymm0, ymm0, ymm0
|
||||||
|
vpermq ymm0, ymm0, 0xd8
|
||||||
|
vmovdqu [edi + esi], xmm0
|
||||||
|
lea esi, [esi + 16]
|
||||||
|
sub ecx, 16
|
||||||
|
jg convertloop16
|
||||||
|
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
vzeroupper
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // HAS_BLENDPLANEROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_ARGBBLENDROW_SSSE3
|
#ifdef HAS_ARGBBLENDROW_SSSE3
|
||||||
// Shuffle table for isolating alpha.
|
// Shuffle table for isolating alpha.
|
||||||
static const uvec8 kShuffleAlpha = {
|
static const uvec8 kShuffleAlpha = {
|
||||||
|
|||||||
@ -1163,16 +1163,14 @@ TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
|
|||||||
EXPECT_LE(max_diff, 1);
|
EXPECT_LE(max_diff, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_BLENDPLANEROW_SSSE3
|
#ifdef HAS_BLENDPLANEROW_AVX2
|
||||||
// TODO(fbarchard): Switch to I420Blend.
|
// TODO(fbarchard): Switch to I420Blend.
|
||||||
static void TestBlendPlane(int width, int height, int benchmark_iterations,
|
static void TestBlendPlaneRow(int width, int height, int benchmark_iterations,
|
||||||
int invert, int off) {
|
int invert, int off) {
|
||||||
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
|
||||||
|
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
|
||||||
width = width * height;
|
width = width * height;
|
||||||
height = 1;
|
height = 1;
|
||||||
if (width < 1) {
|
|
||||||
width = 1;
|
|
||||||
}
|
|
||||||
if (width < 256) {
|
if (width < 256) {
|
||||||
width = 256;
|
width = 256;
|
||||||
}
|
}
|
||||||
@ -1181,23 +1179,39 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations,
|
|||||||
align_buffer_64(src_argb_a, kStride * height + off);
|
align_buffer_64(src_argb_a, kStride * height + off);
|
||||||
align_buffer_64(src_argb_b, kStride * height + off);
|
align_buffer_64(src_argb_b, kStride * height + off);
|
||||||
align_buffer_64(src_argb_alpha, kStride * height + off);
|
align_buffer_64(src_argb_alpha, kStride * height + off);
|
||||||
align_buffer_64(dst_argb_c, kStride * height);
|
align_buffer_64(dst_argb_c, kStride * height + off);
|
||||||
align_buffer_64(dst_argb_opt, kStride * height);
|
align_buffer_64(dst_argb_opt, kStride * height + off);
|
||||||
|
memset(dst_argb_c, 255, kStride * height + off);
|
||||||
|
memset(dst_argb_opt, 255, kStride * height + off);
|
||||||
|
|
||||||
if (has_ssse3) {
|
if (has_ssse3) {
|
||||||
for (int i = 0; i < 255; ++i) {
|
// Test source is maintained exactly if alpha is 255.
|
||||||
src_argb_a[i] = i;
|
for (int i = 0; i < 256; ++i) {
|
||||||
src_argb_b[i] = 255 - i;
|
src_argb_a[i + off] = i;
|
||||||
src_argb_alpha[i] = 255;
|
src_argb_b[i + off] = 255 - i;
|
||||||
|
src_argb_alpha[i + off] = 255;
|
||||||
}
|
}
|
||||||
memset(dst_argb_opt, 0xfb, kStride * height);
|
|
||||||
BlendPlaneRow_SSSE3(src_argb_a + off,
|
BlendPlaneRow_SSSE3(src_argb_a + off,
|
||||||
src_argb_b + off,
|
src_argb_b + off,
|
||||||
src_argb_alpha + off,
|
src_argb_alpha + off,
|
||||||
dst_argb_opt,
|
dst_argb_opt + off,
|
||||||
width * height);
|
256);
|
||||||
for (int i = 0; i < kStride * height; ++i) {
|
for (int i = 0; i < 256; ++i) {
|
||||||
EXPECT_EQ(src_argb_a[i], dst_argb_opt[i]);
|
EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
|
||||||
|
}
|
||||||
|
// Test destination is maintained exactly if alpha is 0.
|
||||||
|
for (int i = 0; i < 256; ++i) {
|
||||||
|
src_argb_a[i + off] = i;
|
||||||
|
src_argb_b[i + off] = 255 - i;
|
||||||
|
src_argb_alpha[i + off] = 0;
|
||||||
|
}
|
||||||
|
BlendPlaneRow_SSSE3(src_argb_a + off,
|
||||||
|
src_argb_b + off,
|
||||||
|
src_argb_alpha + off,
|
||||||
|
dst_argb_opt + off,
|
||||||
|
256);
|
||||||
|
for (int i = 0; i < 256; ++i) {
|
||||||
|
EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < kStride * height; ++i) {
|
for (int i = 0; i < kStride * height; ++i) {
|
||||||
@ -1205,34 +1219,122 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations,
|
|||||||
src_argb_b[i + off] = (fastrand() & 0xff);
|
src_argb_b[i + off] = (fastrand() & 0xff);
|
||||||
src_argb_alpha[i + off] = (fastrand() & 0xff);
|
src_argb_alpha[i + off] = (fastrand() & 0xff);
|
||||||
}
|
}
|
||||||
memset(dst_argb_c, 255, kStride * height);
|
|
||||||
memset(dst_argb_opt, 255, kStride * height);
|
|
||||||
|
|
||||||
BlendPlaneRow_C(src_argb_a + off,
|
BlendPlaneRow_C(src_argb_a + off,
|
||||||
src_argb_b + off,
|
src_argb_b + off,
|
||||||
src_argb_alpha + off,
|
src_argb_alpha + off,
|
||||||
dst_argb_c,
|
dst_argb_c + off,
|
||||||
width * height);
|
width * height);
|
||||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||||
if (has_ssse3) {
|
if (has_avx2) {
|
||||||
BlendPlaneRow_SSSE3(src_argb_a + off,
|
BlendPlaneRow_AVX2(src_argb_a + off,
|
||||||
src_argb_b + off,
|
src_argb_b + off,
|
||||||
src_argb_alpha + off,
|
src_argb_alpha + off,
|
||||||
dst_argb_opt,
|
dst_argb_opt + off,
|
||||||
width * height);
|
width * height);
|
||||||
} else {
|
} else {
|
||||||
BlendPlaneRow_C(src_argb_a + off,
|
if (has_ssse3) {
|
||||||
src_argb_b + off,
|
BlendPlaneRow_SSSE3(src_argb_a + off,
|
||||||
src_argb_alpha + off,
|
src_argb_b + off,
|
||||||
dst_argb_opt,
|
src_argb_alpha + off,
|
||||||
width * height);
|
dst_argb_opt + off,
|
||||||
|
width * height);
|
||||||
|
} else {
|
||||||
|
BlendPlaneRow_C(src_argb_a + off,
|
||||||
|
src_argb_b + off,
|
||||||
|
src_argb_alpha + off,
|
||||||
|
dst_argb_opt + off,
|
||||||
|
width * height);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < kStride * height; ++i) {
|
for (int i = 0; i < kStride * height; ++i) {
|
||||||
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);
|
EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
|
||||||
}
|
}
|
||||||
free_aligned_buffer_64(src_argb_a);
|
free_aligned_buffer_64(src_argb_a);
|
||||||
free_aligned_buffer_64(src_argb_b);
|
free_aligned_buffer_64(src_argb_b);
|
||||||
|
free_aligned_buffer_64(src_argb_alpha);
|
||||||
|
free_aligned_buffer_64(dst_argb_c);
|
||||||
|
free_aligned_buffer_64(dst_argb_opt);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, BlendPlaneRow_Opt) {
|
||||||
|
TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
|
+1, 0);
|
||||||
|
}
|
||||||
|
TEST_F(LibYUVPlanarTest, BlendPlaneRow_Unaligned) {
|
||||||
|
TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
|
+1, 1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void TestBlendPlane(int width, int height, int benchmark_iterations,
|
||||||
|
int disable_cpu_flags, int benchmark_cpu_info,
|
||||||
|
int invert, int off) {
|
||||||
|
if (width < 1) {
|
||||||
|
width = 1;
|
||||||
|
}
|
||||||
|
const int kBpp = 1;
|
||||||
|
const int kStride = width * kBpp;
|
||||||
|
align_buffer_64(src_argb_a, kStride * height + off);
|
||||||
|
align_buffer_64(src_argb_b, kStride * height + off);
|
||||||
|
align_buffer_64(src_argb_alpha, kStride * height + off);
|
||||||
|
align_buffer_64(dst_argb_c, kStride * height + off);
|
||||||
|
align_buffer_64(dst_argb_opt, kStride * height + off);
|
||||||
|
memset(dst_argb_c, 255, kStride * height + off);
|
||||||
|
memset(dst_argb_opt, 255, kStride * height + off);
|
||||||
|
|
||||||
|
// Test source is maintained exactly if alpha is 255.
|
||||||
|
for (int i = 0; i < width; ++i) {
|
||||||
|
src_argb_a[i + off] = i & 255;
|
||||||
|
src_argb_b[i + off] = 255 - (i & 255);
|
||||||
|
}
|
||||||
|
memset(src_argb_alpha + off, 255, width);
|
||||||
|
BlendPlane(src_argb_a + off, width,
|
||||||
|
src_argb_b + off, width,
|
||||||
|
src_argb_alpha + off, width,
|
||||||
|
dst_argb_opt + off, width,
|
||||||
|
width, 1);
|
||||||
|
for (int i = 0; i < width; ++i) {
|
||||||
|
EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
|
||||||
|
}
|
||||||
|
// Test destination is maintained exactly if alpha is 0.
|
||||||
|
memset(src_argb_alpha + off, 0, width);
|
||||||
|
BlendPlane(src_argb_a + off, width,
|
||||||
|
src_argb_b + off, width,
|
||||||
|
src_argb_alpha + off, width,
|
||||||
|
dst_argb_opt + off, width,
|
||||||
|
width, 1);
|
||||||
|
for (int i = 0; i < width; ++i) {
|
||||||
|
EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < kStride * height; ++i) {
|
||||||
|
src_argb_a[i + off] = (fastrand() & 0xff);
|
||||||
|
src_argb_b[i + off] = (fastrand() & 0xff);
|
||||||
|
src_argb_alpha[i + off] = (fastrand() & 0xff);
|
||||||
|
}
|
||||||
|
|
||||||
|
MaskCpuFlags(disable_cpu_flags);
|
||||||
|
BlendPlane(src_argb_a + off, width,
|
||||||
|
src_argb_b + off, width,
|
||||||
|
src_argb_alpha + off, width,
|
||||||
|
dst_argb_c + off, width,
|
||||||
|
width, height);
|
||||||
|
MaskCpuFlags(benchmark_cpu_info);
|
||||||
|
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||||
|
BlendPlane(src_argb_a + off, width,
|
||||||
|
src_argb_b + off, width,
|
||||||
|
src_argb_alpha + off, width,
|
||||||
|
dst_argb_opt + off, width,
|
||||||
|
width, height);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < kStride * height; ++i) {
|
||||||
|
EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
|
||||||
|
}
|
||||||
|
free_aligned_buffer_64(src_argb_a);
|
||||||
|
free_aligned_buffer_64(src_argb_b);
|
||||||
|
free_aligned_buffer_64(src_argb_alpha);
|
||||||
free_aligned_buffer_64(dst_argb_c);
|
free_aligned_buffer_64(dst_argb_c);
|
||||||
free_aligned_buffer_64(dst_argb_opt);
|
free_aligned_buffer_64(dst_argb_opt);
|
||||||
return;
|
return;
|
||||||
@ -1240,9 +1342,106 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations,
|
|||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
|
||||||
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
+1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
|
||||||
|
}
|
||||||
|
TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
|
||||||
|
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
|
||||||
|
|
||||||
|
static void TestI420Blend(int width, int height, int benchmark_iterations,
|
||||||
|
int disable_cpu_flags, int benchmark_cpu_info,
|
||||||
|
int invert, int off) {
|
||||||
|
width = ((width) > 0) ? (width) : 1;
|
||||||
|
const int kStrideUV = SUBSAMPLE(width, 2);
|
||||||
|
const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
|
||||||
|
align_buffer_64(src_y0, width * height + off);
|
||||||
|
align_buffer_64(src_u0, kSizeUV + off);
|
||||||
|
align_buffer_64(src_v0, kSizeUV + off);
|
||||||
|
align_buffer_64(src_y1, width * height + off);
|
||||||
|
align_buffer_64(src_u1, kSizeUV + off);
|
||||||
|
align_buffer_64(src_v1, kSizeUV + off);
|
||||||
|
align_buffer_64(src_a, width * height + off);
|
||||||
|
align_buffer_64(dst_y_c, width * height + off);
|
||||||
|
align_buffer_64(dst_u_c, kSizeUV + off);
|
||||||
|
align_buffer_64(dst_v_c, kSizeUV + off);
|
||||||
|
align_buffer_64(dst_y_opt, width * height + off);
|
||||||
|
align_buffer_64(dst_u_opt, kSizeUV + off);
|
||||||
|
align_buffer_64(dst_v_opt, kSizeUV + off);
|
||||||
|
|
||||||
|
MemRandomize(src_y0, width * height + off);
|
||||||
|
MemRandomize(src_u0, kSizeUV + off);
|
||||||
|
MemRandomize(src_v0, kSizeUV + off);
|
||||||
|
MemRandomize(src_y1, width * height + off);
|
||||||
|
MemRandomize(src_u1, kSizeUV + off);
|
||||||
|
MemRandomize(src_v1, kSizeUV + off);
|
||||||
|
MemRandomize(src_a, width * height + off);
|
||||||
|
memset(dst_y_c, 255, width * height + off);
|
||||||
|
memset(dst_u_c, 255, kSizeUV + off);
|
||||||
|
memset(dst_v_c, 255, kSizeUV + off);
|
||||||
|
memset(dst_y_opt, 255, width * height + off);
|
||||||
|
memset(dst_u_opt, 255, kSizeUV + off);
|
||||||
|
memset(dst_v_opt, 255, kSizeUV + off);
|
||||||
|
|
||||||
|
MaskCpuFlags(disable_cpu_flags);
|
||||||
|
I420Blend(src_y0 + off, width,
|
||||||
|
src_u0 + off, kStrideUV,
|
||||||
|
src_v0 + off, kStrideUV,
|
||||||
|
src_y1 + off, width,
|
||||||
|
src_u1 + off, kStrideUV,
|
||||||
|
src_v1 + off, kStrideUV,
|
||||||
|
src_a + off, width,
|
||||||
|
dst_y_c + off, width,
|
||||||
|
dst_u_c + off, kStrideUV,
|
||||||
|
dst_v_c + off, kStrideUV,
|
||||||
|
width, height);
|
||||||
|
MaskCpuFlags(benchmark_cpu_info);
|
||||||
|
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||||
|
I420Blend(src_y0 + off, width,
|
||||||
|
src_u0 + off, kStrideUV,
|
||||||
|
src_v0 + off, kStrideUV,
|
||||||
|
src_y1 + off, width,
|
||||||
|
src_u1 + off, kStrideUV,
|
||||||
|
src_v1 + off, kStrideUV,
|
||||||
|
src_a + off, width,
|
||||||
|
dst_y_opt + off, width,
|
||||||
|
dst_u_opt + off, kStrideUV,
|
||||||
|
dst_v_opt + off, kStrideUV,
|
||||||
|
width, height);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < width * height; ++i) {
|
||||||
|
EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < kSizeUV; ++i) {
|
||||||
|
EXPECT_NEAR(dst_u_c[i + off], dst_u_opt[i + off], 1); // Subsample off by 1
|
||||||
|
EXPECT_NEAR(dst_v_c[i + off], dst_v_opt[i + off], 1);
|
||||||
|
}
|
||||||
|
free_aligned_buffer_64(src_y0);
|
||||||
|
free_aligned_buffer_64(src_u0);
|
||||||
|
free_aligned_buffer_64(src_v0);
|
||||||
|
free_aligned_buffer_64(src_y1);
|
||||||
|
free_aligned_buffer_64(src_u1);
|
||||||
|
free_aligned_buffer_64(src_v1);
|
||||||
|
free_aligned_buffer_64(src_a);
|
||||||
|
free_aligned_buffer_64(dst_y_c);
|
||||||
|
free_aligned_buffer_64(dst_u_c);
|
||||||
|
free_aligned_buffer_64(dst_v_c);
|
||||||
|
free_aligned_buffer_64(dst_y_opt);
|
||||||
|
free_aligned_buffer_64(dst_u_opt);
|
||||||
|
free_aligned_buffer_64(dst_v_opt);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
|
||||||
|
TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
|
||||||
|
}
|
||||||
|
TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
|
||||||
|
TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestAffine) {
|
TEST_F(LibYUVPlanarTest, TestAffine) {
|
||||||
SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
|
SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user