mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
any allow for avx2 32 pixels at a time of argb
R=harryjin@google.com BUG=libyuv:461 Review URL: https://webrtc-codereview.appspot.com/54779004.
This commit is contained in:
parent
f4705d56e7
commit
9487b9d6d8
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1442
|
||||
Version: 1443
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1442
|
||||
#define LIBYUV_VERSION 1443
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -224,16 +224,16 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
|
||||
// Any 1 to 1.
|
||||
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 2]); \
|
||||
memset(temp, 0, 64); /* for YUY2 and msan */ \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(temp, temp + 64, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
|
||||
ANY_SIMD(temp, temp + 128, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_COPYROW_AVX
|
||||
@ -593,28 +593,29 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
|
||||
#undef ANY12
|
||||
|
||||
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
|
||||
// 128 byte row allows for 32 avx ARGB pixels.
|
||||
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \
|
||||
uint8* dst_u, uint8* dst_v, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 4]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 4]); \
|
||||
memset(temp, 0, 128 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
memcpy(temp + 64, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
||||
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
||||
SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, 4); \
|
||||
memcpy(temp + 64 + SS(r, UVSHIFT) * BPP, \
|
||||
temp + 64 + SS(r, UVSHIFT) * BPP - BPP, 4); \
|
||||
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
|
||||
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4); \
|
||||
} \
|
||||
ANY_SIMD(temp, 64, temp + 128, temp + 192, MASK + 1); \
|
||||
memcpy(dst_u + (n >> 1), temp + 128, SS(r, 1)); \
|
||||
memcpy(dst_v + (n >> 1), temp + 192, SS(r, 1)); \
|
||||
ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
|
||||
memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
|
||||
memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||
|
||||
@ -34,7 +34,7 @@ namespace libyuv {
|
||||
#define ERROR_FULL 5
|
||||
#endif
|
||||
|
||||
#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF, CDIFF) \
|
||||
#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \
|
||||
TEST_F(libyuvTest, TESTNAME) { \
|
||||
const int kPixels = benchmark_width_ * benchmark_height_; \
|
||||
const int kHalfPixels = ((benchmark_width_ + 1) / 2) * \
|
||||
@ -93,7 +93,7 @@ TEST_F(libyuvTest, TESTNAME) { \
|
||||
temp_v, (benchmark_width_ + 1) / 2, \
|
||||
benchmark_width_, benchmark_height_); \
|
||||
\
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
YUVTOARGB(temp_y, benchmark_width_, \
|
||||
temp_u, (benchmark_width_ + 1) / 2, \
|
||||
temp_v, (benchmark_width_ + 1) / 2, \
|
||||
@ -110,7 +110,7 @@ TEST_F(libyuvTest, TESTNAME) { \
|
||||
} \
|
||||
/* Test C and SIMD match. */ \
|
||||
for (int i = 0; i < kPixels * 4; ++i) { \
|
||||
EXPECT_NEAR(dst_pixels_c[i], dst_pixels_opt[i], CDIFF); \
|
||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
|
||||
} \
|
||||
/* Test SIMD is close to original. */ \
|
||||
for (int i = 0; i < kPixels * 4; ++i) { \
|
||||
@ -129,10 +129,10 @@ TEST_F(libyuvTest, TESTNAME) { \
|
||||
free_aligned_buffer_64(dst_pixels_c); \
|
||||
} \
|
||||
|
||||
TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL, 0)
|
||||
TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL, 0)
|
||||
TESTCS(TestJ420, J420ToARGB, ARGBToJ420, 1, 2, benchmark_width_, 3, 0)
|
||||
TESTCS(TestJ422, J422ToARGB, ARGBToJ422, 0, 1, 0, 3, 0)
|
||||
TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL)
|
||||
TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL)
|
||||
TESTCS(TestJ420, J420ToARGB, ARGBToJ420, 1, 2, benchmark_width_, 3)
|
||||
TESTCS(TestJ422, J422ToARGB, ARGBToJ422, 0, 1, 0, 3)
|
||||
|
||||
static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) {
|
||||
const int kWidth = 16;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user