mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBToI420 C version match SIMD
Bug: libyuv:447 Change-Id: Iafb28cf635b355837caf41c26baee665642f4f95 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2181779 Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
7a61759f78
commit
0b8bb60f2e
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1751
|
||||
Version: 1752
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1751
|
||||
#define LIBYUV_VERSION 1752
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -579,6 +579,15 @@ int NV21ToNV12(const uint8_t* src_y,
|
||||
if (dst_y) {
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_vu = src_vu + (halfheight - 1) * src_stride_vu;
|
||||
src_stride_vu = -src_stride_vu;
|
||||
}
|
||||
|
||||
SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
|
||||
@ -27,6 +27,12 @@ extern "C" {
|
||||
#define LIBYUV_RGB7 1
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(_M_IX86)
|
||||
#define LIBYUV_ARGBTOUV_PAVGB 1
|
||||
#define LIBYUV_RGBTOU_TRUNCATE 1
|
||||
#endif
|
||||
|
||||
// llvm x86 is poor at ternary operator, so use branchless min/max.
|
||||
|
||||
#define USE_BRANCHLESS 1
|
||||
@ -420,14 +426,36 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef LIBYUV_RGBTOU_TRUNCATE
|
||||
static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
|
||||
}
|
||||
static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
|
||||
}
|
||||
#else
|
||||
static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
|
||||
}
|
||||
static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
|
||||
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
|
||||
}
|
||||
static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
|
||||
return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
// ARGBToY_C and ARGBToUV_C
|
||||
// Intel version mimic SSE/AVX which does 2 pavgb
|
||||
#if LIBYUV_ARGBTOUV_PAVGB
|
||||
#define MAKEROWY(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
|
||||
int x; \
|
||||
@ -442,15 +470,12 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
||||
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
|
||||
int x; \
|
||||
for (x = 0; x < width - 1; x += 2) { \
|
||||
uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
|
||||
src_rgb1[B + BPP]) >> \
|
||||
2; \
|
||||
uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
|
||||
src_rgb1[G + BPP]) >> \
|
||||
2; \
|
||||
uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
|
||||
src_rgb1[R + BPP]) >> \
|
||||
2; \
|
||||
uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
|
||||
AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
|
||||
uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
|
||||
AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
|
||||
uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
|
||||
AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
|
||||
dst_u[0] = RGBToU(ar, ag, ab); \
|
||||
dst_v[0] = RGBToV(ar, ag, ab); \
|
||||
src_rgb0 += BPP * 2; \
|
||||
@ -459,13 +484,54 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
|
||||
uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
|
||||
uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
|
||||
uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
|
||||
uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
|
||||
uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
|
||||
dst_u[0] = RGBToU(ar, ag, ab); \
|
||||
dst_v[0] = RGBToV(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
// ARM version does sum / 2 then multiply by 2x smaller coefficients
|
||||
#define MAKEROWY(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
|
||||
int x; \
|
||||
for (x = 0; x < width; ++x) { \
|
||||
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
|
||||
src_argb0 += BPP; \
|
||||
dst_y += 1; \
|
||||
} \
|
||||
} \
|
||||
void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
||||
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
|
||||
int x; \
|
||||
for (x = 0; x < width - 1; x += 2) { \
|
||||
uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
|
||||
src_rgb1[B + BPP] + 1) >> \
|
||||
1; \
|
||||
uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
|
||||
src_rgb1[G + BPP] + 1) >> \
|
||||
1; \
|
||||
uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
|
||||
src_rgb1[R + BPP] + 1) >> \
|
||||
1; \
|
||||
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
||||
src_rgb0 += BPP * 2; \
|
||||
src_rgb1 += BPP * 2; \
|
||||
dst_u += 1; \
|
||||
dst_v += 1; \
|
||||
} \
|
||||
if (width & 1) { \
|
||||
uint16_t ab = (src_rgb0[B] + src_rgb1[B]); \
|
||||
uint16_t ag = (src_rgb0[G] + src_rgb1[G]); \
|
||||
uint16_t ar = (src_rgb0[R] + src_rgb1[R]); \
|
||||
dst_u[0] = RGB2xToU(ar, ag, ab); \
|
||||
dst_v[0] = RGB2xToV(ar, ag, ab); \
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
MAKEROWY(ARGB, 2, 1, 0, 4)
|
||||
MAKEROWY(BGRA, 1, 2, 3, 4)
|
||||
@ -519,8 +585,6 @@ static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
|
||||
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
|
||||
}
|
||||
|
||||
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
// ARGBToYJ_C and ARGBToUVJ_C
|
||||
#define MAKEROWYJ(NAME, R, G, B, BPP) \
|
||||
void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
|
||||
|
||||
@ -32,8 +32,9 @@
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// arm version subsamples by summing 4 pixels then multiplying by matrix with
|
||||
// 4x smaller coefficients which are rounded to nearest integer.
|
||||
// arm version subsamples by summing 4 pixels, rounding divide by 2, then
|
||||
// multiplying by matrix with 2x smaller coefficients which are rounded
|
||||
// to nearest integer.
|
||||
#define ARM_YUV_ERROR 4
|
||||
#else
|
||||
#define ARM_YUV_ERROR 0
|
||||
@ -246,7 +247,7 @@ TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2)
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 3); \
|
||||
EXPECT_LE(max_diff, 0); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
|
||||
int abs_diff = abs( \
|
||||
@ -1008,30 +1009,28 @@ TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
|
||||
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
|
||||
benchmark_width_, DIFF, _Opt, +, 0)
|
||||
|
||||
TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
|
||||
TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 0)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 0)
|
||||
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
|
||||
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
|
||||
#endif
|
||||
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
|
||||
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
|
||||
#endif
|
||||
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 0)
|
||||
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 0)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 0)
|
||||
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 0)
|
||||
|
||||
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
|
||||
SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||
@ -1072,7 +1071,7 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 4); \
|
||||
EXPECT_LE(max_diff, 0); \
|
||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||
for (int j = 0; j < kStrideUV * 2; ++j) { \
|
||||
int abs_diff = \
|
||||
@ -1083,7 +1082,7 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
EXPECT_LE(max_diff, 4); \
|
||||
EXPECT_LE(max_diff, 0); \
|
||||
free_aligned_buffer_page_end(dst_y_c); \
|
||||
free_aligned_buffer_page_end(dst_uv_c); \
|
||||
free_aligned_buffer_page_end(dst_y_opt); \
|
||||
|
||||
@ -3512,7 +3512,6 @@ TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
|
||||
// Round count up to multiple of 16
|
||||
int dst_width = (benchmark_width_ + 1) / 2;
|
||||
int dst_height = (benchmark_height_ + 1) / 2;
|
||||
align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
|
||||
@ -3529,15 +3528,11 @@ TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
|
||||
MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
|
||||
MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
|
||||
|
||||
ScalePlane(src_pixels_u, benchmark_width_, benchmark_width_,
|
||||
benchmark_height_,
|
||||
|
||||
tmp_pixels_u, dst_width, dst_width, dst_height, kFilterBilinear);
|
||||
ScalePlane(src_pixels_v, benchmark_width_, benchmark_width_,
|
||||
benchmark_height_, tmp_pixels_v, dst_width, dst_width, dst_height,
|
||||
kFilterBilinear);
|
||||
MergeUVPlane(tmp_pixels_u, dst_width, tmp_pixels_v, dst_width,
|
||||
dst_pixels_uv_c, dst_width * 2, dst_width, dst_height);
|
||||
MaskCpuFlags(disable_cpu_flags_);
|
||||
HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
|
||||
benchmark_width_, dst_pixels_uv_c, dst_width * 2,
|
||||
benchmark_width_, benchmark_height_);
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user