mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
RAWToJ400 and RGBToJ400 use 2 step row function for Intel. RAWToJ400 Was 3996 ms, now 3309. 20.7% faster.
Call a row function for each row, based on ARGBToI400 code. But implement row functions as 2 step conversion. Adds the row functions: RAWToYJ, RGBToYJ, SSSE3 and AVX2 versions, and Any versions. The smaller row buffer is more cache friendly on large images. The max cache size can be configured, and is currently: // Maximum temporary width for wrappers to process at a time, in pixels. And the row buffer is SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); So 8192 bytes are used for the row buffer, leaving the rest for source and destination buffers. blaze-bin/third_party/libyuv/libyuv_test '--gunit_filter=*R*To?400_Opt' --libyuv_width=3600 --libyuv_height=2500 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1 | sortms Was RAWToJ400_Opt (3996 ms) ARGBToI400_Opt (3964 ms) RGB24ToJ400_Opt (3960 ms) ARGBToJ400_Opt (3909 ms) RGBAToJ400_Opt (3885 ms) Now ARGBToJ400_Opt (4091 ms) ARGBToI400_Opt (3936 ms) RGBAToJ400_Opt (3428 ms) RGB24ToJ400_Opt (3324 ms) RAWToJ400_Opt (3309 ms) Bug: libyuv:854, b/147753855 Change-Id: Ieb65fbda94e812c737f4c3c74107354b73c4bcd2 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2016203 Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
1cea4235af
commit
3db22ebc4b
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1743
|
||||
Version: 1744
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -123,6 +123,8 @@ extern "C" {
|
||||
#define HAS_RAWTOYROW_SSSE3
|
||||
#define HAS_RGB24TOARGBROW_SSSE3
|
||||
#define HAS_RGB24TOYROW_SSSE3
|
||||
#define HAS_RGB24TOYJROW_SSSE3
|
||||
#define HAS_RAWTOYJROW_SSSE3
|
||||
#define HAS_RGB565TOARGBROW_SSE2
|
||||
#define HAS_RGBATOUVROW_SSSE3
|
||||
#define HAS_RGBATOYROW_SSSE3
|
||||
@ -194,6 +196,8 @@ extern "C" {
|
||||
#define HAS_ARGBTOUVROW_AVX2
|
||||
#define HAS_ARGBTOYJROW_AVX2
|
||||
#define HAS_ARGBTOYROW_AVX2
|
||||
#define HAS_RGB24TOYJROW_AVX2
|
||||
#define HAS_RAWTOYJROW_AVX2
|
||||
#define HAS_COPYROW_AVX
|
||||
#define HAS_H422TOARGBROW_AVX2
|
||||
#define HAS_HALFFLOATROW_AVX2
|
||||
@ -973,7 +977,11 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
||||
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||
@ -1187,8 +1195,12 @@ void RGBAToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1743
|
||||
#define LIBYUV_VERSION 1744
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -2380,27 +2380,38 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) =
|
||||
RGB24ToYJRow_C;
|
||||
#else
|
||||
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||
RGB24ToARGBRow_C;
|
||||
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
#endif
|
||||
if (!src_rgb24 || !dst_yj || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
|
||||
src_stride_rgb24 = -src_stride_rgb24;
|
||||
}
|
||||
|
||||
// Neon version does direct RGB24 to YJ.
|
||||
// Coalesce rows.
|
||||
if (src_stride_rgb24 == width * 3 && dst_stride_yj == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_rgb24 = dst_stride_yj = 0;
|
||||
}
|
||||
#if defined(HAS_RGB24TOYJROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
|
||||
@ -2408,83 +2419,28 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
|
||||
RGB24ToYJRow = RGB24ToYJRow_NEON;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB24TOYJROW_MSA)
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_MSA;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB24TOYJROW_MMI)
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYJROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToYJRow = RGB24ToYJRow_MMI;
|
||||
}
|
||||
}
|
||||
// Other platforms do intermediate conversion from RGB24 to ARGB.
|
||||
#else
|
||||
#if defined(HAS_RGB24TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYJRow = ARGBToYJRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
{
|
||||
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
RGB24ToYJRow(src_rgb24, dst_yj, width);
|
||||
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_yj + dst_stride_yj, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
|
||||
ARGBToYJRow(row, dst_yj, width);
|
||||
ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width);
|
||||
#endif
|
||||
src_rgb24 += src_stride_rgb24 * 2;
|
||||
dst_yj += dst_stride_yj * 2;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
RGB24ToYJRow(src_rgb24, dst_yj, width);
|
||||
#else
|
||||
RGB24ToARGBRow(src_rgb24, row, width);
|
||||
ARGBToYJRow(row, dst_yj, width);
|
||||
#endif
|
||||
}
|
||||
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
|
||||
defined(HAS_RGB24TOYJROW_MMI))
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB24ToYJRow(src_rgb24, dst_yj, width);
|
||||
src_rgb24 += src_stride_rgb24;
|
||||
dst_yj += dst_stride_yj;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -2498,27 +2454,38 @@ int RAWToJ400(const uint8_t* src_raw,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
|
||||
defined(HAS_RAWTOYJROW_MMI))
|
||||
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_yj, int width) =
|
||||
RAWToYJRow_C;
|
||||
#else
|
||||
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||
RAWToARGBRow_C;
|
||||
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
#endif
|
||||
if (!src_raw || !dst_yj || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_raw = src_raw + (height - 1) * src_stride_raw;
|
||||
src_stride_raw = -src_stride_raw;
|
||||
}
|
||||
|
||||
// Neon version does direct RAW to YJ.
|
||||
// Coalesce rows.
|
||||
if (src_stride_raw == width * 3 && dst_stride_yj == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_raw = dst_stride_yj = 0;
|
||||
}
|
||||
#if defined(HAS_RAWTOYJROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToYJRow = RAWToYJRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToYJRow = RAWToYJRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_NEON;
|
||||
@ -2526,83 +2493,28 @@ int RAWToJ400(const uint8_t* src_raw,
|
||||
RAWToYJRow = RAWToYJRow_NEON;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RAWTOYJROW_MSA)
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToYJRow = RAWToYJRow_MSA;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RAWTOYJROW_MMI)
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYJROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
RAWToYJRow = RAWToYJRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RAWToYJRow = RAWToYJRow_MMI;
|
||||
}
|
||||
}
|
||||
// Other platforms do intermediate conversion from RAW to ARGB.
|
||||
#else
|
||||
#if defined(HAS_RAWTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToARGBRow = RAWToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYJRow = ARGBToYJRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYJRow = ARGBToYJRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
{
|
||||
#if !(defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
|
||||
defined(HAS_RAWTOYJROW_MMI))
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
|
||||
defined(HAS_RAWTOYJROW_MMI))
|
||||
RAWToYJRow(src_raw, dst_yj, width);
|
||||
RAWToYJRow(src_raw + src_stride_raw, dst_yj + dst_stride_yj, width);
|
||||
#else
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
|
||||
ARGBToYJRow(row, dst_yj, width);
|
||||
ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width);
|
||||
#endif
|
||||
src_raw += src_stride_raw * 2;
|
||||
dst_yj += dst_stride_yj * 2;
|
||||
}
|
||||
if (height & 1) {
|
||||
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
|
||||
defined(HAS_RAWTOYJROW_MMI))
|
||||
RAWToYJRow(src_raw, dst_yj, width);
|
||||
#else
|
||||
RAWToARGBRow(src_raw, row, width);
|
||||
ARGBToYJRow(row, dst_yj, width);
|
||||
#endif
|
||||
}
|
||||
#if !(defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
|
||||
defined(HAS_RAWTOYJROW_MMI))
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
RAWToYJRow(src_raw, dst_yj, width);
|
||||
src_raw += src_stride_raw;
|
||||
dst_yj += dst_stride_yj;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -695,6 +695,12 @@ ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7)
|
||||
#ifdef HAS_RGB24TOYROW_NEON
|
||||
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYJROW_AVX2
|
||||
ANY11(RGB24ToYJRow_Any_AVX2, RGB24ToYJRow_AVX2, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYJROW_SSSE3
|
||||
ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYJROW_NEON
|
||||
ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 7)
|
||||
#endif
|
||||
@ -707,6 +713,12 @@ ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7)
|
||||
#ifdef HAS_RAWTOYROW_NEON
|
||||
ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYJROW_AVX2
|
||||
ANY11(RAWToYJRow_Any_AVX2, RAWToYJRow_AVX2, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYJROW_SSSE3
|
||||
ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYJROW_NEON
|
||||
ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 7)
|
||||
#endif
|
||||
|
||||
@ -3307,6 +3307,70 @@ void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_RGB24TOYJROW_AVX2
|
||||
// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
|
||||
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
|
||||
ARGBToYJRow_AVX2(row, dst_yj, twidth);
|
||||
src_rgb24 += twidth * 3;
|
||||
dst_yj += twidth;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif //HAS_RGB24TOYJROW_AVX2
|
||||
|
||||
#ifdef HAS_RAWTOYJROW_AVX2
|
||||
// Convert 16 RAW pixels (64 bytes) to 16 YJ values.
|
||||
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
RAWToARGBRow_SSSE3(src_raw, row, twidth);
|
||||
ARGBToYJRow_AVX2(row, dst_yj, twidth);
|
||||
src_raw += twidth * 3;
|
||||
dst_yj += twidth;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif //HAS_RAWTOYJROW_AVX2
|
||||
|
||||
#ifdef HAS_RGB24TOYJROW_SSSE3
|
||||
// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
|
||||
void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
|
||||
ARGBToYJRow_SSSE3(row, dst_yj, twidth);
|
||||
src_rgb24 += twidth * 3;
|
||||
dst_yj += twidth;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif //HAS_RGB24TOYJROW_SSSE3
|
||||
|
||||
#ifdef HAS_RAWTOYJROW_SSSE3
|
||||
// Convert 16 RAW pixels (64 bytes) to 16 YJ values.
|
||||
void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
RAWToARGBRow_SSSE3(src_raw, row, twidth);
|
||||
ARGBToYJRow_SSSE3(row, dst_yj, twidth);
|
||||
src_raw += twidth * 3;
|
||||
dst_yj += twidth;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif //HAS_RAWTOYJROW_SSSE3
|
||||
|
||||
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
|
||||
float fsum = 0.f;
|
||||
int i;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user