mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
YUVToARGB, YUVToBGRA,YUVToABGR optimized.
Review URL: http://webrtc-codereview.appspot.com/267022 git-svn-id: http://libyuv.googlecode.com/svn/trunk@86 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
bc8f28eb52
commit
93d003f874
@ -62,6 +62,7 @@
|
||||
['arm_neon==1', {
|
||||
'sources' : [
|
||||
'source/rotate_neon.cc',
|
||||
'source/row_neon.cc',
|
||||
],
|
||||
}],
|
||||
],
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
|
||||
# sources
|
||||
'unit_test/compare_test.cc',
|
||||
'unit_test/planar_test.cc',
|
||||
'unit_test/rotate_test.cc',
|
||||
'unit_test/scale_test.cc',
|
||||
'unit_test/unit_test.cc',
|
||||
|
||||
@ -1188,7 +1188,11 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && (width % 16 == 0)) {
|
||||
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
(width % 8 == 0) &&
|
||||
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
|
||||
@ -1227,7 +1231,11 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_SSSE3)
|
||||
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && (width % 16 == 0)) {
|
||||
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTOBGRAROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
(width % 8 == 0) &&
|
||||
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
|
||||
@ -1266,7 +1274,11 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#if defined(HAS_FASTCONVERTYUVTOABGRROW_SSSE3)
|
||||
#if defined(HAS_FASTCONVERTYUVTOABGRROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && (width % 16 == 0)) {
|
||||
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_NEON;
|
||||
} else
|
||||
#elif defined(HAS_FASTCONVERTYUVTOABGRROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) &&
|
||||
(width % 8 == 0) &&
|
||||
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
|
||||
|
||||
21
source/row.h
21
source/row.h
@ -19,6 +19,27 @@
|
||||
#define YUV_DISABLE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
||||
#define HAS_FASTCONVERTYUVTOARGBROW_NEON
|
||||
void FastConvertYUVToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#define HAS_FASTCONVERTYUVTOBGRAROW_NEON
|
||||
void FastConvertYUVToBGRARow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#define HAS_FASTCONVERTYUVTOABGRROW_NEON
|
||||
void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width);
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms
|
||||
#if (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
|
||||
!defined(YUV_DISABLE_ASM)
|
||||
|
||||
156
source/row_neon.cc
Normal file
156
source/row_neon.cc
Normal file
@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "row.h"
|
||||
|
||||
#define YUVTORGB \
|
||||
"vld1.u8 {d0}, [%0]! \n" \
|
||||
"vld1.u32 {d2[0]}, [%1]! \n" \
|
||||
"vld1.u32 {d2[1]}, [%2]! \n" \
|
||||
\
|
||||
"veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\
|
||||
\
|
||||
"vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\
|
||||
\
|
||||
"vmull.s8 q9, d2, d25 \n"/* u/v G component */\
|
||||
\
|
||||
"vmov.u8 d1, #0 \n"/* split odd/even y apart */\
|
||||
"vtrn.u8 d0, d1 \n" \
|
||||
\
|
||||
"vsub.s16 q0, q0, q15 \n"/* offset y */\
|
||||
"vmul.s16 q0, q0, q14 \n" \
|
||||
\
|
||||
"vadd.s16 d18, d19 \n" \
|
||||
\
|
||||
"vqadd.s16 d20, d0, d16 \n" \
|
||||
"vqadd.s16 d21, d1, d16 \n" \
|
||||
\
|
||||
"vqadd.s16 d22, d0, d17 \n" \
|
||||
"vqadd.s16 d23, d1, d17 \n" \
|
||||
\
|
||||
"vqadd.s16 d16, d0, d18 \n" \
|
||||
"vqadd.s16 d17, d1, d18 \n" \
|
||||
\
|
||||
"vqrshrun.s16 d0, q10, #6 \n" \
|
||||
"vqrshrun.s16 d1, q11, #6 \n" \
|
||||
"vqrshrun.s16 d2, q8, #6 \n" \
|
||||
\
|
||||
"vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\
|
||||
"vmovl.u8 q11, d1 \n" \
|
||||
"vmovl.u8 q8, d2 \n" \
|
||||
\
|
||||
"vtrn.u8 d20, d21 \n" \
|
||||
"vtrn.u8 d22, d23 \n" \
|
||||
"vtrn.u8 d16, d17 \n" \
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) || \
|
||||
defined(HAS_FASTCONVERTYUVTOBGRAROW_NEON) || \
|
||||
defined(HAS_FASTCONVERTYUVTOABGRROW_NEON)
|
||||
static const vec8 kUVToRB[8] = { 127, 127, 127, 127, 102, 102, 102, 102 };
|
||||
static const vec8 kUVToG[8] = { -25, -25, -25, -25, -52, -52, -52, -52 };
|
||||
#endif
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
|
||||
void FastConvertYUVToARGBRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%5] \n"
|
||||
"vld1.u8 {d25}, [%6] \n"
|
||||
"vmov.u8 d26, #128 \n"
|
||||
"vmov.u16 q14, #74 \n"
|
||||
"vmov.u16 q15, #16 \n"
|
||||
"1: \n"
|
||||
YUVTORGB
|
||||
"vmov.u8 d21, d16 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
"vst4.u8 {d20, d21, d22, d23}, [%3]! \n"
|
||||
"subs %4, %4, #8 \n"
|
||||
"bhi 1b \n"
|
||||
: "+r"(y_buf), // %0
|
||||
"+r"(u_buf), // %1
|
||||
"+r"(v_buf), // %2
|
||||
"+r"(rgb_buf), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"(kUVToRB),
|
||||
"r"(kUVToG)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9",
|
||||
"q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_NEON)
|
||||
void FastConvertYUVToBGRARow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%5] \n"
|
||||
"vld1.u8 {d25}, [%6] \n"
|
||||
"vmov.u8 d26, #128 \n"
|
||||
"vmov.u16 q14, #74 \n"
|
||||
"vmov.u16 q15, #16 \n"
|
||||
"1: \n"
|
||||
YUVTORGB
|
||||
"vswp.u8 d20, d22 \n"
|
||||
"vmov.u8 d21, d16 \n"
|
||||
"vmov.u8 d19, #255 \n"
|
||||
"vst4.u8 {d19, d20, d21, d22}, [%3]! \n"
|
||||
"subs %4, %4, #8 \n"
|
||||
"bhi 1b \n"
|
||||
: "+r"(y_buf), // %0
|
||||
"+r"(u_buf), // %1
|
||||
"+r"(v_buf), // %2
|
||||
"+r"(rgb_buf), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"(kUVToRB),
|
||||
"r"(kUVToG)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9",
|
||||
"q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_FASTCONVERTYUVTOABGRROW_NEON)
|
||||
void FastConvertYUVToABGRRow_NEON(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {d24}, [%5] \n"
|
||||
"vld1.u8 {d25}, [%6] \n"
|
||||
"vmov.u8 d26, #128 \n"
|
||||
"vmov.u16 q14, #74 \n"
|
||||
"vmov.u16 q15, #16 \n"
|
||||
"1: \n"
|
||||
YUVTORGB
|
||||
"vswp.u8 d20, d22 \n"
|
||||
"vmov.u8 d21, d16 \n"
|
||||
"vmov.u8 d23, #255 \n"
|
||||
"vst4.u8 {d20, d21, d22, d23}, [%3]! \n"
|
||||
"subs %4, %4, #8 \n"
|
||||
"bhi 1b \n"
|
||||
: "+r"(y_buf), // %0
|
||||
"+r"(u_buf), // %1
|
||||
"+r"(v_buf), // %2
|
||||
"+r"(rgb_buf), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"(kUVToRB),
|
||||
"r"(kUVToG)
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9",
|
||||
"q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
118
unit_test/planar_test.cc
Normal file
118
unit_test/planar_test.cc
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "unit_test.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "libyuv/rotate.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
TEST_F (libyuvTest, BenchmarkI420ToARGB_C) {
|
||||
align_buffer_16(src_y, _benchmark_width * _benchmark_height);
|
||||
align_buffer_16(src_u, ((_benchmark_width * _benchmark_height) >> 2));
|
||||
align_buffer_16(src_v, ((_benchmark_width * _benchmark_height) >> 2));
|
||||
align_buffer_16(dst_argb, ((_benchmark_width << 2) * _benchmark_height));
|
||||
|
||||
MaskCpuFlags(kCpuInitialized);
|
||||
|
||||
for (int i = 0; i < _benchmark_iterations; ++i)
|
||||
I420ToARGB(src_y, _benchmark_width,
|
||||
src_u, _benchmark_width >> 1,
|
||||
src_v, _benchmark_width >> 1,
|
||||
dst_argb, _benchmark_width << 2,
|
||||
_benchmark_width, _benchmark_height);
|
||||
|
||||
MaskCpuFlags(-1);
|
||||
|
||||
EXPECT_EQ(0, 0);
|
||||
|
||||
free_aligned_buffer_16(src_y)
|
||||
free_aligned_buffer_16(src_u)
|
||||
free_aligned_buffer_16(src_v)
|
||||
free_aligned_buffer_16(dst_argb)
|
||||
}
|
||||
|
||||
TEST_F (libyuvTest, BenchmarkI420ToARGB_OPT) {
|
||||
align_buffer_16(src_y, _benchmark_width * _benchmark_height);
|
||||
align_buffer_16(src_u, (_benchmark_width * _benchmark_height) >> 2);
|
||||
align_buffer_16(src_v, (_benchmark_width * _benchmark_height) >> 2);
|
||||
align_buffer_16(dst_argb, (_benchmark_width << 2) * _benchmark_height);
|
||||
|
||||
for (int i = 0; i < _benchmark_iterations; ++i)
|
||||
I420ToARGB(src_y, _benchmark_width,
|
||||
src_u, _benchmark_width >> 1,
|
||||
src_v, _benchmark_width >> 1,
|
||||
dst_argb, _benchmark_width << 2,
|
||||
_benchmark_width, _benchmark_height);
|
||||
|
||||
free_aligned_buffer_16(src_y)
|
||||
free_aligned_buffer_16(src_u)
|
||||
free_aligned_buffer_16(src_v)
|
||||
free_aligned_buffer_16(dst_argb)
|
||||
}
|
||||
|
||||
#define TESTI420TO(FMT) \
|
||||
TEST_F (libyuvTest, I420To##FMT##_CvsOPT) { \
|
||||
const int src_width = 1280; \
|
||||
const int src_height = 720; \
|
||||
align_buffer_16(src_y, src_width * src_height); \
|
||||
align_buffer_16(src_u, (src_width * src_height) >> 2); \
|
||||
align_buffer_16(src_v, (src_width * src_height) >> 2); \
|
||||
align_buffer_16(dst_rgb_c, (src_width << 2) * src_height); \
|
||||
align_buffer_16(dst_rgb_opt, (src_width << 2) * src_height); \
|
||||
srandom(time(NULL)); \
|
||||
for (int i = 0; i < src_height; ++i) \
|
||||
for (int j = 0; j < src_width; ++j) \
|
||||
src_y[(i * src_height) + j] = (random() & 0xff); \
|
||||
for (int i = 0; i < src_height >> 1; ++i) \
|
||||
for (int j = 0; j < src_width >> 1; ++j) { \
|
||||
src_u[(i * src_height >> 1) + j] = (random() & 0xff); \
|
||||
src_v[(i * src_height >> 1) + j] = (random() & 0xff); \
|
||||
} \
|
||||
MaskCpuFlags(kCpuInitialized); \
|
||||
I420To##FMT(src_y, src_width, \
|
||||
src_u, src_width >> 1, \
|
||||
src_v, src_width >> 1, \
|
||||
dst_rgb_c, src_width << 2, \
|
||||
src_width, src_height); \
|
||||
MaskCpuFlags(-1); \
|
||||
I420To##FMT(src_y, src_width, \
|
||||
src_u, src_width >> 1, \
|
||||
src_v, src_width >> 1, \
|
||||
dst_rgb_opt, src_width << 2, \
|
||||
src_width, src_height); \
|
||||
int err = 0; \
|
||||
int i = 0; \
|
||||
for (int i = 0; i < src_height; ++i) { \
|
||||
for (int j = 0; j < src_width << 2; ++j) { \
|
||||
int diff = (int)(dst_rgb_c[i * src_height + j]) - \
|
||||
(int)(dst_rgb_opt[i * src_height + j]); \
|
||||
if (abs(diff) > 2) \
|
||||
err++; \
|
||||
} \
|
||||
} \
|
||||
EXPECT_EQ(err, 0); \
|
||||
free_aligned_buffer_16(src_y) \
|
||||
free_aligned_buffer_16(src_u) \
|
||||
free_aligned_buffer_16(src_v) \
|
||||
free_aligned_buffer_16(dst_rgb_c) \
|
||||
free_aligned_buffer_16(dst_rgb_opt) \
|
||||
}
|
||||
|
||||
TESTI420TO(ARGB)
|
||||
TESTI420TO(BGRA)
|
||||
TESTI420TO(ABGR)
|
||||
|
||||
}
|
||||
@ -16,7 +16,7 @@
|
||||
#define align_buffer_16(var, size) \
|
||||
uint8 *var; \
|
||||
uint8 *var##_mem; \
|
||||
var##_mem = reinterpret_cast<uint8*>(calloc(size+15, sizeof(uint8))); \
|
||||
var##_mem = reinterpret_cast<uint8*>(calloc((size)+15, sizeof(uint8))); \
|
||||
var = reinterpret_cast<uint8*> \
|
||||
((reinterpret_cast<intptr_t>(var##_mem) + 15) & (~0x0f));
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user