I420ToRGB24MatrixFilter function added

- Implemented as 3 steps: Upsample UV to 4:4:4, I444ToARGB, ARGBToRGB24
- Fix some build warnings for missing prototypes.

Pixel 4
I420ToRGB24_Opt (743 ms)
I420ToRGB24Filter_Opt (1331 ms)

Windows with skylake xeon:
x86 32 bit
I420ToRGB24_Opt (387 ms)
I420ToRGB24Filter_Opt (571 ms)
x64 64 bit
I420ToRGB24_Opt (384 ms)
I420ToRGB24Filter_Opt (582 ms)


Bug: libyuv:938, libyuv:830
Change-Id: Ie27f70816ec084437014f8a1c630ae011ee2348c
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3900298
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2022-09-16 11:12:39 -07:00 committed by Frank Barchard
parent 3e38ce5058
commit f71c83552d
19 changed files with 549 additions and 298 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1840 Version: 1841
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -1975,6 +1975,21 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
int height, int height,
enum FilterMode filter); enum FilterMode filter);
// Convert I420 to RGB24 with matrix and UV filter mode.
LIBYUV_API
int I420ToRGB24MatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter);
// Convert I010 to AR30 with matrix and UV filter mode. // Convert I010 to AR30 with matrix and UV filter mode.
LIBYUV_API LIBYUV_API
int I010ToAR30MatrixFilter(const uint16_t* src_y, int I010ToAR30MatrixFilter(const uint16_t* src_y,

View File

@ -1824,6 +1824,11 @@ void RGBAToUVRow_C(const uint8_t* src_rgb,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void RGBAToUVJRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void RGB24ToUVRow_C(const uint8_t* src_rgb, void RGB24ToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb, int src_stride_rgb,
uint8_t* dst_u, uint8_t* dst_u,
@ -2044,11 +2049,11 @@ void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void DetileToYUY2_C(const uint8_t* src_y, void DetileToYUY2_C(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride, ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width); int width);
void DetileToYUY2_SSE2(const uint8_t* src_y, void DetileToYUY2_SSE2(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, const uint8_t* src_uv,
@ -5608,6 +5613,17 @@ void GaussCol_F32_C(const float* src0,
float* dst, float* dst,
int width); int width);
void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
void GaussCol_C(const uint16_t* src0,
const uint16_t* src1,
const uint16_t* src2,
const uint16_t* src3,
const uint16_t* src4,
uint32_t* dst,
int width);
void ClampFloatToZero_SSE2(const float* src_x, float* dst_y, int width);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1840 #define LIBYUV_VERSION 1841
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -727,7 +727,7 @@ int MM21ToYUY2(const uint8_t* src_y,
} }
DetileToYUY2(src_y, src_stride_y, src_uv, src_stride_uv, dst_yuy2, DetileToYUY2(src_y, src_stride_y, src_uv, src_stride_uv, dst_yuy2,
dst_stride_yuy2, width, height, 32); dst_stride_yuy2, width, height, 32);
return 0; return 0;
} }
@ -2054,8 +2054,8 @@ int RGB24ToI420(const uint8_t* src_rgb24,
{ {
#if !defined(HAS_RGB24TOYROW) #if !defined(HAS_RGB24TOYROW)
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
@ -2065,10 +2065,10 @@ int RGB24ToI420(const uint8_t* src_rgb24,
RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else #else
RGB24ToARGBRow(src_rgb24, row, width); RGB24ToARGBRow(src_rgb24, row, width);
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif #endif
src_rgb24 += src_stride_rgb24 * 2; src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;
@ -2208,8 +2208,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
{ {
#if !defined(HAS_RGB24TOYJROW) #if !defined(HAS_RGB24TOYJROW)
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
@ -2219,10 +2219,10 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
#else #else
RGB24ToARGBRow(src_rgb24, row, width); RGB24ToARGBRow(src_rgb24, row, width);
RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width);
ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width); ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif #endif
src_rgb24 += src_stride_rgb24 * 2; src_rgb24 += src_stride_rgb24 * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;
@ -2382,8 +2382,8 @@ int RAWToI420(const uint8_t* src_raw,
{ {
#if !defined(HAS_RAWTOYROW) #if !defined(HAS_RAWTOYROW)
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
@ -2393,10 +2393,10 @@ int RAWToI420(const uint8_t* src_raw,
RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else #else
RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif #endif
src_raw += src_stride_raw * 2; src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;
@ -2536,8 +2536,8 @@ int RAWToJ420(const uint8_t* src_raw,
{ {
#if !defined(HAS_RAWTOYJROW) #if !defined(HAS_RAWTOYJROW)
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
@ -2547,10 +2547,10 @@ int RAWToJ420(const uint8_t* src_raw,
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else #else
RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); ARGBToUVJRow(row, row_size, dst_u, dst_v, width);
ARGBToYJRow(row, dst_y, width); ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif #endif
src_raw += src_stride_raw * 2; src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;
@ -2714,8 +2714,8 @@ int RGB565ToI420(const uint8_t* src_rgb565,
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
@ -2725,10 +2725,10 @@ int RGB565ToI420(const uint8_t* src_rgb565,
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
#else #else
RGB565ToARGBRow(src_rgb565, row, width); RGB565ToARGBRow(src_rgb565, row, width);
RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width); RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + row_size, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif #endif
src_rgb565 += src_stride_rgb565 * 2; src_rgb565 += src_stride_rgb565 * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;
@ -2894,8 +2894,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
@ -2907,11 +2907,11 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
width); width);
#else #else
ARGB1555ToARGBRow(src_argb1555, row, width); ARGB1555ToARGBRow(src_argb1555, row, width);
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize, ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + row_size,
width); width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif #endif
src_argb1555 += src_stride_argb1555 * 2; src_argb1555 += src_stride_argb1555 * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;
@ -3089,8 +3089,8 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
{ {
#if !(defined(HAS_ARGB4444TOYROW_NEON)) #if !(defined(HAS_ARGB4444TOYROW_NEON))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
@ -3101,11 +3101,11 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
width); width);
#else #else
ARGB4444ToARGBRow(src_argb4444, row, width); ARGB4444ToARGBRow(src_argb4444, row, width);
ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize, ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + row_size,
width); width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToUVRow(row, row_size, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYRow(row + row_size, dst_y + dst_stride_y, width);
#endif #endif
src_argb4444 += src_stride_argb4444 * 2; src_argb4444 += src_stride_argb4444 * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;

View File

@ -7,8 +7,10 @@
* in the file PATENTS. All contributing project authors may * in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "libyuv/convert_argb.h" #include "libyuv/convert_argb.h"
#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#ifdef HAVE_JPEG #ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h" #include "libyuv/mjpeg_decoder.h"
@ -5497,22 +5499,22 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
#endif #endif
// alloc 4 lines temp // alloc 4 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 4); align_buffer_64(row, row_size * 4);
uint8_t* temp_u_1 = row; uint8_t* temp_u_1 = row;
uint8_t* temp_u_2 = row + kRowSize; uint8_t* temp_u_2 = row + row_size;
uint8_t* temp_v_1 = row + kRowSize * 2; uint8_t* temp_v_1 = row + row_size * 2;
uint8_t* temp_v_2 = row + kRowSize * 3; uint8_t* temp_v_2 = row + row_size * 3;
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
src_y += src_stride_y; src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) { for (y = 0; y < height - 2; y += 2) {
Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
src_y += src_stride_y; src_y += src_stride_y;
@ -5524,8 +5526,8 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y,
} }
if (!(height & 1)) { if (!(height & 1)) {
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); I444ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
} }
@ -5622,10 +5624,10 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
#endif #endif
// alloc 2 lines temp // alloc 2 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
uint8_t* temp_u = row; uint8_t* temp_u = row;
uint8_t* temp_v = row + kRowSize; uint8_t* temp_v = row + row_size;
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ScaleRowUp(src_u, temp_u, width); ScaleRowUp(src_u, temp_u, width);
@ -5641,6 +5643,188 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y,
return 0; return 0;
} }
static int I420ToRGB24MatrixBilinear(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
const uint8_t* v_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I444ToARGBRow_C;
void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
ARGBToRGB24Row_C;
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_Any_C;
if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
dst_stride_rgb24 = -dst_stride_rgb24;
}
#if defined(HAS_I444TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I444TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I444ToARGBRow = I444ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I444ToARGBRow = I444ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I444ToARGBRow = I444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I444ToARGBRow = I444ToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_MSA;
}
}
#endif
#if defined(HAS_I444TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I444ToARGBRow = I444ToARGBRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
I444ToARGBRow = I444ToARGBRow_LASX;
}
}
#endif
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTORGB24ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToRGB24Row = ARGBToRGB24Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
if (TestCpuFlag(kCpuHasAVX512VBMI)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI;
if (IS_ALIGNED(width, 32)) {
ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI;
}
}
#endif
#if defined(HAS_ARGBTORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_NEON;
}
}
#endif
#if defined(HAS_ARGBTORGB24ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_MSA;
}
}
#endif
#if defined(HAS_ARGBTORGB24ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ARGBToRGB24Row = ARGBToRGB24Row_LASX;
}
}
#endif
// TODO: Fix HAS macros to match function names
#if defined(HAS_SCALEROWUP2_LINEAR_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
}
#endif
#if defined(HAS_SCALEROWUP2_LINEAR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
}
#endif
// alloc 4 lines temp
const int row_size = (width + 31) & ~31;
align_buffer_64(row, row_size * 8);
uint8_t* temp_u_1 = row;
uint8_t* temp_u_2 = row + row_size;
uint8_t* temp_v_1 = row + row_size * 2;
uint8_t* temp_v_2 = row + row_size * 3;
uint8_t* temp_argb = row + row_size * 4;
Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
ARGBToRGB24Row(temp_argb, dst_rgb24, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) {
Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
ARGBToRGB24Row(temp_argb, dst_rgb24, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
I444ToARGBRow(src_y, temp_u_2, temp_v_2, temp_argb, yuvconstants, width);
ARGBToRGB24Row(temp_argb, dst_rgb24, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
if (!(height & 1)) {
Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I444ToARGBRow(src_y, temp_u_1, temp_v_1, temp_argb, yuvconstants, width);
ARGBToRGB24Row(temp_argb, dst_rgb24, width);
}
free_aligned_buffer_64(row);
return 0;
}
static int I010ToAR30MatrixBilinear(const uint16_t* src_y, static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
int src_stride_y, int src_stride_y,
const uint16_t* src_u, const uint16_t* src_u,
@ -5705,22 +5889,22 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
#endif #endif
// alloc 4 lines temp // alloc 4 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row); uint16_t* temp_u_1 = (uint16_t*)(row);
uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30; dst_ar30 += dst_stride_ar30;
src_y += src_stride_y; src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) { for (y = 0; y < height - 2; y += 2) {
Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30; dst_ar30 += dst_stride_ar30;
src_y += src_stride_y; src_y += src_stride_y;
@ -5732,8 +5916,8 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y,
} }
if (!(height & 1)) { if (!(height & 1)) {
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width); I410ToAR30Row(src_y, temp_u_1, temp_v_1, dst_ar30, yuvconstants, width);
} }
@ -5803,10 +5987,10 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y,
#endif #endif
// alloc 2 lines temp // alloc 2 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_u = (uint16_t*)(row);
uint16_t* temp_v = (uint16_t*)(row) + kRowSize; uint16_t* temp_v = (uint16_t*)(row) + row_size;
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ScaleRowUp(src_u, temp_u, width); ScaleRowUp(src_u, temp_u, width);
@ -5885,22 +6069,22 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
#endif #endif
// alloc 4 lines temp // alloc 4 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row); uint16_t* temp_u_1 = (uint16_t*)(row);
uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
src_y += src_stride_y; src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) { for (y = 0; y < height - 2; y += 2) {
Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
src_y += src_stride_y; src_y += src_stride_y;
@ -5912,8 +6096,8 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y,
} }
if (!(height & 1)) { if (!(height & 1)) {
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width); I410ToARGBRow(src_y, temp_u_1, temp_v_1, dst_argb, yuvconstants, width);
} }
@ -5982,10 +6166,10 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y,
#endif #endif
// alloc 2 lines temp // alloc 2 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_u = (uint16_t*)(row);
uint16_t* temp_v = (uint16_t*)(row) + kRowSize; uint16_t* temp_v = (uint16_t*)(row) + row_size;
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ScaleRowUp(src_u, temp_u, width); ScaleRowUp(src_u, temp_u, width);
@ -6134,15 +6318,15 @@ static int I420AlphaToARGBMatrixBilinear(
#endif #endif
// alloc 4 lines temp // alloc 4 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 4); align_buffer_64(row, row_size * 4);
uint8_t* temp_u_1 = row; uint8_t* temp_u_1 = row;
uint8_t* temp_u_2 = row + kRowSize; uint8_t* temp_u_2 = row + row_size;
uint8_t* temp_v_1 = row + kRowSize * 2; uint8_t* temp_v_1 = row + row_size * 2;
uint8_t* temp_v_2 = row + kRowSize * 3; uint8_t* temp_v_2 = row + row_size * 3;
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width); width);
if (attenuate) { if (attenuate) {
@ -6153,8 +6337,8 @@ static int I420AlphaToARGBMatrixBilinear(
src_a += src_stride_a; src_a += src_stride_a;
for (y = 0; y < height - 2; y += 2) { for (y = 0; y < height - 2; y += 2) {
Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width); width);
if (attenuate) { if (attenuate) {
@ -6176,8 +6360,8 @@ static int I420AlphaToARGBMatrixBilinear(
} }
if (!(height & 1)) { if (!(height & 1)) {
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, I444AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width); width);
if (attenuate) { if (attenuate) {
@ -6317,10 +6501,10 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y,
#endif #endif
// alloc 2 lines temp // alloc 2 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
uint8_t* temp_u = row; uint8_t* temp_u = row;
uint8_t* temp_v = row + kRowSize; uint8_t* temp_v = row + row_size;
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ScaleRowUp(src_u, temp_u, width); ScaleRowUp(src_u, temp_u, width);
@ -6445,15 +6629,15 @@ static int I010AlphaToARGBMatrixBilinear(
#endif #endif
// alloc 4 lines temp // alloc 4 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 4 * sizeof(uint16_t)); align_buffer_64(row, row_size * 4 * sizeof(uint16_t));
uint16_t* temp_u_1 = (uint16_t*)(row); uint16_t* temp_u_1 = (uint16_t*)(row);
uint16_t* temp_u_2 = (uint16_t*)(row) + kRowSize; uint16_t* temp_u_2 = (uint16_t*)(row) + row_size;
uint16_t* temp_v_1 = (uint16_t*)(row) + kRowSize * 2; uint16_t* temp_v_1 = (uint16_t*)(row) + row_size * 2;
uint16_t* temp_v_2 = (uint16_t*)(row) + kRowSize * 3; uint16_t* temp_v_2 = (uint16_t*)(row) + row_size * 3;
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width); width);
if (attenuate) { if (attenuate) {
@ -6464,8 +6648,8 @@ static int I010AlphaToARGBMatrixBilinear(
src_a += src_stride_a; src_a += src_stride_a;
for (y = 0; y < height - 2; y += 2) { for (y = 0; y < height - 2; y += 2) {
Scale2RowUp(src_u, src_stride_u, temp_u_1, kRowSize, width); Scale2RowUp(src_u, src_stride_u, temp_u_1, row_size, width);
Scale2RowUp(src_v, src_stride_v, temp_v_1, kRowSize, width); Scale2RowUp(src_v, src_stride_v, temp_v_1, row_size, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width); width);
if (attenuate) { if (attenuate) {
@ -6487,8 +6671,8 @@ static int I010AlphaToARGBMatrixBilinear(
} }
if (!(height & 1)) { if (!(height & 1)) {
Scale2RowUp(src_u, 0, temp_u_1, kRowSize, width); Scale2RowUp(src_u, 0, temp_u_1, row_size, width);
Scale2RowUp(src_v, 0, temp_v_1, kRowSize, width); Scale2RowUp(src_v, 0, temp_v_1, row_size, width);
I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants, I410AlphaToARGBRow(src_y, temp_u_1, temp_v_1, src_a, dst_argb, yuvconstants,
width); width);
if (attenuate) { if (attenuate) {
@ -6600,10 +6784,10 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y,
#endif #endif
// alloc 2 lines temp // alloc 2 lines temp
const int kRowSize = (width + 31) & ~31; const int row_size = (width + 31) & ~31;
align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_u = (uint16_t*)(row); uint16_t* temp_u = (uint16_t*)(row);
uint16_t* temp_v = (uint16_t*)(row) + kRowSize; uint16_t* temp_v = (uint16_t*)(row) + row_size;
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ScaleRowUp(src_u, temp_u, width); ScaleRowUp(src_u, temp_u, width);
@ -6684,18 +6868,18 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
#endif #endif
// alloc 2 lines temp // alloc 2 lines temp
const int kRowSize = (2 * width + 31) & ~31; const int row_size = (2 * width + 31) & ~31;
align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_1 = (uint16_t*)(row);
uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize; uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
src_y += src_stride_y; src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) { for (y = 0; y < height - 2; y += 2) {
Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width); Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
src_y += src_stride_y; src_y += src_stride_y;
@ -6706,7 +6890,7 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y,
} }
if (!(height & 1)) { if (!(height & 1)) {
Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width); P410ToARGBRow(src_y, temp_uv_1, dst_argb, yuvconstants, width);
} }
@ -6773,8 +6957,8 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y,
} }
#endif #endif
const int kRowSize = (2 * width + 31) & ~31; const int row_size = (2 * width + 31) & ~31;
align_buffer_64(row, kRowSize * sizeof(uint16_t)); align_buffer_64(row, row_size * sizeof(uint16_t));
uint16_t* temp_uv = (uint16_t*)(row); uint16_t* temp_uv = (uint16_t*)(row);
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
@ -6850,18 +7034,18 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
#endif #endif
// alloc 2 lines temp // alloc 2 lines temp
const int kRowSize = (2 * width + 31) & ~31; const int row_size = (2 * width + 31) & ~31;
align_buffer_64(row, kRowSize * 2 * sizeof(uint16_t)); align_buffer_64(row, row_size * 2 * sizeof(uint16_t));
uint16_t* temp_uv_1 = (uint16_t*)(row); uint16_t* temp_uv_1 = (uint16_t*)(row);
uint16_t* temp_uv_2 = (uint16_t*)(row) + kRowSize; uint16_t* temp_uv_2 = (uint16_t*)(row) + row_size;
Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30; dst_ar30 += dst_stride_ar30;
src_y += src_stride_y; src_y += src_stride_y;
for (y = 0; y < height - 2; y += 2) { for (y = 0; y < height - 2; y += 2) {
Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, kRowSize, width); Scale2RowUp(src_uv, src_stride_uv, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
dst_ar30 += dst_stride_ar30; dst_ar30 += dst_stride_ar30;
src_y += src_stride_y; src_y += src_stride_y;
@ -6872,7 +7056,7 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y,
} }
if (!(height & 1)) { if (!(height & 1)) {
Scale2RowUp(src_uv, 0, temp_uv_1, kRowSize, width); Scale2RowUp(src_uv, 0, temp_uv_1, row_size, width);
P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width); P410ToAR30Row(src_y, temp_uv_1, dst_ar30, yuvconstants, width);
} }
@ -6939,8 +7123,8 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y,
} }
#endif #endif
const int kRowSize = (2 * width + 31) & ~31; const int row_size = (2 * width + 31) & ~31;
align_buffer_64(row, kRowSize * sizeof(uint16_t)); align_buffer_64(row, row_size * sizeof(uint16_t));
uint16_t* temp_uv = (uint16_t*)(row); uint16_t* temp_uv = (uint16_t*)(row);
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
@ -7015,6 +7199,37 @@ int I422ToARGBMatrixFilter(const uint8_t* src_y,
return -1; return -1;
} }
LIBYUV_API
int I420ToRGB24MatrixFilter(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_u,
int src_stride_u,
const uint8_t* src_v,
int src_stride_v,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height,
enum FilterMode filter) {
switch (filter) {
case kFilterNone:
return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_rgb24, dst_stride_rgb24,
yuvconstants, width, height);
case kFilterBilinear:
case kFilterBox:
return I420ToRGB24MatrixBilinear(
src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
dst_rgb24, dst_stride_rgb24, yuvconstants, width, height);
case kFilterLinear:
// TODO: Implement Linear using Bilinear with Scale2RowUp stride 0
return -1;
}
return -1;
}
LIBYUV_API LIBYUV_API
int I010ToAR30MatrixFilter(const uint16_t* src_y, int I010ToAR30MatrixFilter(const uint16_t* src_y,
int src_stride_y, int src_stride_y,

View File

@ -1866,7 +1866,7 @@ int ARGBToJ420(const uint8_t* src_argb,
int height) { int height) {
int y; int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_uj, uint8_t* dst_vj, int width) = uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C; ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C; ARGBToYJRow_C;
@ -2238,7 +2238,7 @@ int ABGRToJ420(const uint8_t* src_abgr,
int height) { int height) {
int y; int y;
void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr,
uint8_t* dst_uj, uint8_t* dst_vj, int width) = uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ABGRToUVJRow_C; ABGRToUVJRow_C;
void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
ABGRToYJRow_C; ABGRToYJRow_C;
@ -2804,8 +2804,8 @@ int RAWToJNV21(const uint8_t* src_raw,
uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31); uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31);
#if !defined(HAS_RAWTOYJROW) #if !defined(HAS_RAWTOYJROW)
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int row_size = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
@ -2816,11 +2816,11 @@ int RAWToJNV21(const uint8_t* src_raw,
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else #else
RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width);
ARGBToUVJRow(row, kRowSize, row_uj, row_vj, width); ARGBToUVJRow(row, row_size, row_uj, row_vj, width);
MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width); ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width);
#endif #endif
src_raw += src_stride_raw * 2; src_raw += src_stride_raw * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;

View File

@ -115,7 +115,7 @@ void CpuId(int eax, int ecx, int* cpu_info) {
defined(__x86_64__)) && \ defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
int GetXCR0() { static int GetXCR0() {
int xcr0 = 0; int xcr0 = 0;
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT

View File

@ -1035,20 +1035,20 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
LIBYUV_API LIBYUV_API
void DetileToYUY2(const uint8_t* src_y, void DetileToYUY2(const uint8_t* src_y,
int src_stride_y, int src_stride_y,
const uint8_t* src_uv, const uint8_t* src_uv,
int src_stride_uv, int src_stride_uv,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int dst_stride_yuy2, int dst_stride_yuy2,
int width, int width,
int height, int height,
int tile_height) { int tile_height) {
const ptrdiff_t src_y_tile_stride = 16 * tile_height; const ptrdiff_t src_y_tile_stride = 16 * tile_height;
const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2; const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2;
int y; int y;
void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2, int width) = DetileToYUY2_C; uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
assert(src_stride_y >= 0); assert(src_stride_y >= 0);
assert(src_stride_y > 0); assert(src_stride_y > 0);
assert(src_stride_uv >= 0); assert(src_stride_uv >= 0);
@ -1085,8 +1085,8 @@ void DetileToYUY2(const uint8_t* src_y,
// Detile plane // Detile plane
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2,
dst_yuy2, width); width);
dst_yuy2 += dst_stride_yuy2; dst_yuy2 += dst_stride_yuy2;
src_y += 16; src_y += 16;
@ -1224,18 +1224,18 @@ void MergeRGBPlane(const uint8_t* src_r,
} }
LIBYUV_NOINLINE LIBYUV_NOINLINE
void SplitARGBPlaneAlpha(const uint8_t* src_argb, static void SplitARGBPlaneAlpha(const uint8_t* src_argb,
int src_stride_argb, int src_stride_argb,
uint8_t* dst_r, uint8_t* dst_r,
int dst_stride_r, int dst_stride_r,
uint8_t* dst_g, uint8_t* dst_g,
int dst_stride_g, int dst_stride_g,
uint8_t* dst_b, uint8_t* dst_b,
int dst_stride_b, int dst_stride_b,
uint8_t* dst_a, uint8_t* dst_a,
int dst_stride_a, int dst_stride_a,
int width, int width,
int height) { int height) {
int y; int y;
void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_a, int width) = uint8_t* dst_b, uint8_t* dst_a, int width) =
@ -1295,16 +1295,16 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
} }
LIBYUV_NOINLINE LIBYUV_NOINLINE
void SplitARGBPlaneOpaque(const uint8_t* src_argb, static void SplitARGBPlaneOpaque(const uint8_t* src_argb,
int src_stride_argb, int src_stride_argb,
uint8_t* dst_r, uint8_t* dst_r,
int dst_stride_r, int dst_stride_r,
uint8_t* dst_g, uint8_t* dst_g,
int dst_stride_g, int dst_stride_g,
uint8_t* dst_b, uint8_t* dst_b,
int dst_stride_b, int dst_stride_b,
int width, int width,
int height) { int height) {
int y; int y;
void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, int width) = SplitXRGBRow_C; uint8_t* dst_b, int width) = SplitXRGBRow_C;
@ -1396,18 +1396,18 @@ void SplitARGBPlane(const uint8_t* src_argb,
} }
LIBYUV_NOINLINE LIBYUV_NOINLINE
void MergeARGBPlaneAlpha(const uint8_t* src_r, static void MergeARGBPlaneAlpha(const uint8_t* src_r,
int src_stride_r, int src_stride_r,
const uint8_t* src_g, const uint8_t* src_g,
int src_stride_g, int src_stride_g,
const uint8_t* src_b, const uint8_t* src_b,
int src_stride_b, int src_stride_b,
const uint8_t* src_a, const uint8_t* src_a,
int src_stride_a, int src_stride_a,
uint8_t* dst_argb, uint8_t* dst_argb,
int dst_stride_argb, int dst_stride_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g, void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, const uint8_t* src_a, const uint8_t* src_b, const uint8_t* src_a,
@ -1458,16 +1458,16 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
} }
LIBYUV_NOINLINE LIBYUV_NOINLINE
void MergeARGBPlaneOpaque(const uint8_t* src_r, static void MergeARGBPlaneOpaque(const uint8_t* src_r,
int src_stride_r, int src_stride_r,
const uint8_t* src_g, const uint8_t* src_g,
int src_stride_g, int src_stride_g,
const uint8_t* src_b, const uint8_t* src_b,
int src_stride_b, int src_stride_b,
uint8_t* dst_argb, uint8_t* dst_argb,
int dst_stride_argb, int dst_stride_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g, void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, uint8_t* dst_argb, int width) = const uint8_t* src_b, uint8_t* dst_argb, int width) =
@ -4545,16 +4545,16 @@ static int ARGBSobelize(const uint8_t* src_argb,
#endif #endif
{ {
// 3 rows with edges before/after. // 3 rows with edges before/after.
const int kRowSize = (width + kEdge + 31) & ~31; const int row_size = (width + kEdge + 31) & ~31;
align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge));
uint8_t* row_sobelx = rows; uint8_t* row_sobelx = rows;
uint8_t* row_sobely = rows + kRowSize; uint8_t* row_sobely = rows + row_size;
uint8_t* row_y = rows + kRowSize * 2; uint8_t* row_y = rows + row_size * 2;
// Convert first row. // Convert first row.
uint8_t* row_y0 = row_y + kEdge; uint8_t* row_y0 = row_y + kEdge;
uint8_t* row_y1 = row_y0 + kRowSize; uint8_t* row_y1 = row_y0 + row_size;
uint8_t* row_y2 = row_y1 + kRowSize; uint8_t* row_y2 = row_y1 + row_size;
ARGBToYJRow(src_argb, row_y0, width); ARGBToYJRow(src_argb, row_y0, width);
row_y0[-1] = row_y0[0]; row_y0[-1] = row_y0[0];
memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.

View File

@ -8,11 +8,12 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "libyuv/rotate.h" #include "libyuv/rotate_argb.h"
#include "libyuv/convert.h" #include "libyuv/convert.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */ #include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */

View File

@ -2749,11 +2749,11 @@ void DetileSplitUVRow_C(const uint8_t* src_uv,
} }
void DetileToYUY2_C(const uint8_t* src_y, void DetileToYUY2_C(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride, ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width) { int width) {
for (int x = 0; x < width - 15; x += 16) { for (int x = 0; x < width - 15; x += 16) {
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
dst_yuy2[0] = src_y[0]; dst_yuy2[0] = src_y[0];

View File

@ -4977,19 +4977,19 @@ void DetileToYUY2_SSE2(const uint8_t* src_y,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width) { int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" // Load 16 Y "movdqu (%0),%%xmm0 \n" // Load 16 Y
"sub $0x10,%3 \n" "sub $0x10,%3 \n"
"lea (%0,%4),%0 \n" "lea (%0,%4),%0 \n"
"movdqu (%1),%%xmm1 \n" // Load 8 UV "movdqu (%1),%%xmm1 \n" // Load 8 UV
"lea (%1,%5),%1 \n" "lea (%1,%5),%1 \n"
"movdqu %%xmm0,%%xmm2 \n" "movdqu %%xmm0,%%xmm2 \n"
"punpcklbw %%xmm1,%%xmm0 \n" "punpcklbw %%xmm1,%%xmm0 \n"
"punpckhbw %%xmm1,%%xmm2 \n" "punpckhbw %%xmm1,%%xmm2 \n"
"movdqu %%xmm0,(%2) \n" "movdqu %%xmm0,(%2) \n"
"movdqu %%xmm2,0x10(%2) \n" "movdqu %%xmm2,0x10(%2) \n"
"lea 0x20(%2),%2 \n" "lea 0x20(%2),%2 \n"
"jg 1b \n" "jg 1b \n"
: "+r"(src_y), // %0 : "+r"(src_y), // %0
"+r"(src_uv), // %1 "+r"(src_uv), // %1
"+r"(dst_yuy2), // %2 "+r"(dst_yuy2), // %2

View File

@ -625,20 +625,20 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
#if LIBYUV_USE_ST2 #if LIBYUV_USE_ST2
// Read 16 Y, 8 UV, and write 8 YUYV. // Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_NEON(const uint8_t* src_y, void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride, ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width) { int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"vld1.8 q0, [%0], %4 \n" // Load 16 Y "vld1.8 q0, [%0], %4 \n" // Load 16 Y
"pld [%0, 1792] \n" "pld [%0, 1792] \n"
"vld1.8 q1, [%1], %5 \n" // Load 8 UV "vld1.8 q1, [%1], %5 \n" // Load 8 UV
"pld [%1, 1792] \n" "pld [%1, 1792] \n"
"subs %3, %3, #16 \n" "subs %3, %3, #16 \n"
"vst2.8 {q0, q1}, [%2]! \n" "vst2.8 {q0, q1}, [%2]! \n"
"bgt 1b \n" "bgt 1b \n"
: "+r"(src_y), // %0 : "+r"(src_y), // %0
"+r"(src_uv), // %1 "+r"(src_uv), // %1
"+r"(dst_yuy2), // %2 "+r"(dst_yuy2), // %2
@ -651,21 +651,21 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
#else #else
// Read 16 Y, 8 UV, and write 8 YUYV. // Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_NEON(const uint8_t* src_y, void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride, ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width) { int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"vld1.8 q0, [%0], %4 \n" // Load 16 Y "vld1.8 q0, [%0], %4 \n" // Load 16 Y
"vld1.8 q1, [%1], %5 \n" // Load 8 UV "vld1.8 q1, [%1], %5 \n" // Load 8 UV
"subs %3, %3, #16 \n" "subs %3, %3, #16 \n"
"pld [%0, 1792] \n" "pld [%0, 1792] \n"
"vzip.8 q0, q1 \n" "vzip.8 q0, q1 \n"
"pld [%1, 1792] \n" "pld [%1, 1792] \n"
"vst1.8 {q0, q1}, [%2]! \n" "vst1.8 {q0, q1}, [%2]! \n"
"bgt 1b \n" "bgt 1b \n"
: "+r"(src_y), // %0 : "+r"(src_y), // %0
"+r"(src_uv), // %1 "+r"(src_uv), // %1
"+r"(dst_yuy2), // %2 "+r"(dst_yuy2), // %2

View File

@ -653,11 +653,11 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
#if LIBYUV_USE_ST2 #if LIBYUV_USE_ST2
// Read 16 Y, 8 UV, and write 8 YUY2 // Read 16 Y, 8 UV, and write 8 YUY2
void DetileToYUY2_NEON(const uint8_t* src_y, void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride, ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width) { int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
@ -667,23 +667,23 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
"subs %w3, %w3, #16 \n" // store 8 YUY2 "subs %w3, %w3, #16 \n" // store 8 YUY2
"st2 {v0.16b,v1.16b}, [%2], #32 \n" "st2 {v0.16b,v1.16b}, [%2], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_y), // %0 : "+r"(src_y), // %0
"+r"(src_uv), // %1 "+r"(src_uv), // %1
"+r"(dst_yuy2), // %2 "+r"(dst_yuy2), // %2
"+r"(width) // %3 "+r"(width) // %3
: "r"(src_y_tile_stride), // %4 : "r"(src_y_tile_stride), // %4
"r"(src_uv_tile_stride) // %5 "r"(src_uv_tile_stride) // %5
: "cc", "memory", "v0", "v1" // Clobber list : "cc", "memory", "v0", "v1" // Clobber list
); );
} }
#else #else
// Read 16 Y, 8 UV, and write 8 YUY2 // Read 16 Y, 8 UV, and write 8 YUY2
void DetileToYUY2_NEON(const uint8_t* src_y, void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride, ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width) { int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys "ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
@ -694,13 +694,13 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
"prfm pldl1keep, [%1, 1792] \n" "prfm pldl1keep, [%1, 1792] \n"
"zip2 v3.16b, v0.16b, v1.16b \n" "zip2 v3.16b, v0.16b, v1.16b \n"
"st1 {v2.16b,v3.16b}, [%2], #32 \n" // store 8 YUY2 "st1 {v2.16b,v3.16b}, [%2], #32 \n" // store 8 YUY2
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_y), // %0 : "+r"(src_y), // %0
"+r"(src_uv), // %1 "+r"(src_uv), // %1
"+r"(dst_yuy2), // %2 "+r"(dst_yuy2), // %2
"+r"(width) // %3 "+r"(width) // %3
: "r"(src_y_tile_stride), // %4 : "r"(src_y_tile_stride), // %4
"r"(src_uv_tile_stride) // %5 "r"(src_uv_tile_stride) // %5
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber list : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber list
); );
} }

View File

@ -1315,11 +1315,11 @@ void ScalePlaneBilinearUp(int src_width,
const uint8_t* src = src_ptr + yi * (int64_t)src_stride; const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers. // Allocate 2 row buffers.
const int kRowSize = (dst_width + 31) & ~31; const int row_size = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row; uint8_t* rowptr = row;
int rowstride = kRowSize; int rowstride = row_size;
int lasty = yi; int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx); ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1766,11 +1766,11 @@ void ScalePlaneBilinearUp_16(int src_width,
const uint16_t* src = src_ptr + yi * (int64_t)src_stride; const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers. // Allocate 2 row buffers.
const int kRowSize = (dst_width + 31) & ~31; const int row_size = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4); align_buffer_64(row, row_size * 4);
uint16_t* rowptr = (uint16_t*)row; uint16_t* rowptr = (uint16_t*)row;
int rowstride = kRowSize; int rowstride = row_size;
int lasty = yi; int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx); ScaleFilterCols(rowptr, src, dst_width, x, dx);

View File

@ -155,8 +155,8 @@ static void ScaleARGBDown4Box(int src_width,
int dy) { int dy) {
int j; int j;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 31) & ~31; const int row_size = (dst_width * 2 * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
int row_stride = src_stride * (dy >> 16); int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
uint8_t* dst_argb, int dst_width) = uint8_t* dst_argb, int dst_width) =
@ -187,9 +187,9 @@ static void ScaleARGBDown4Box(int src_width,
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize, ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + row_size,
dst_width * 2); dst_width * 2);
ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); ScaleARGBRowDown2(row, row_size, dst_argb, dst_width);
src_argb += row_stride; src_argb += row_stride;
dst_argb += dst_stride; dst_argb += dst_stride;
} }
@ -548,11 +548,11 @@ static void ScaleARGBBilinearUp(int src_width,
const uint8_t* src = src_argb + yi * (int64_t)src_stride; const uint8_t* src = src_argb + yi * (int64_t)src_stride;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31; const int row_size = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row; uint8_t* rowptr = row;
int rowstride = kRowSize; int rowstride = row_size;
int lasty = yi; int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -798,14 +798,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v; const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31; const int row_size = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
// Allocate 1 row of ARGB for source conversion. // Allocate 1 row of ARGB for source conversion.
align_buffer_64(argb_row, src_width * 4); align_buffer_64(argb_row, src_width * 4);
uint8_t* rowptr = row; uint8_t* rowptr = row;
int rowstride = kRowSize; int rowstride = row_size;
int lasty = yi; int lasty = yi;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB. // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.

View File

@ -193,8 +193,8 @@ static void ScaleUVDown4Box(int src_width,
int dy) { int dy) {
int j; int j;
// Allocate 2 rows of UV. // Allocate 2 rows of UV.
const int kRowSize = (dst_width * 2 * 2 + 15) & ~15; const int row_size = (dst_width * 2 * 2 + 15) & ~15;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
int row_stride = src_stride * (dy >> 16); int row_stride = src_stride * (dy >> 16);
void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
uint8_t* dst_uv, int dst_width) = uint8_t* dst_uv, int dst_width) =
@ -234,9 +234,9 @@ static void ScaleUVDown4Box(int src_width,
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2); ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize, ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + row_size,
dst_width * 2); dst_width * 2);
ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width); ScaleUVRowDown2(row, row_size, dst_uv, dst_width);
src_uv += row_stride; src_uv += row_stride;
dst_uv += dst_stride; dst_uv += dst_stride;
} }
@ -574,11 +574,11 @@ static void ScaleUVBilinearUp(int src_width,
const uint8_t* src = src_uv + yi * (int64_t)src_stride; const uint8_t* src = src_uv + yi * (int64_t)src_stride;
// Allocate 2 rows of UV. // Allocate 2 rows of UV.
const int kRowSize = (dst_width * 2 + 15) & ~15; const int row_size = (dst_width * 2 + 15) & ~15;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, row_size * 2);
uint8_t* rowptr = row; uint8_t* rowptr = row;
int rowstride = kRowSize; int rowstride = row_size;
int lasty = yi; int lasty = yi;
ScaleUVFilterCols(rowptr, src, dst_width, x, dx); ScaleUVFilterCols(rowptr, src, dst_width, x, dx);

View File

@ -680,6 +680,9 @@ TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
#define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \ #define I422ToARGBFilter(a, b, c, d, e, f, g, h, i, j) \
I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \ I422ToARGBMatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
kFilterBilinear) kFilterBilinear)
#define I420ToRGB24Filter(a, b, c, d, e, f, g, h, i, j) \
I420ToRGB24MatrixFilter(a, b, c, d, e, f, g, h, &kYuvI601Constants, i, j, \
kFilterBilinear)
#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
@ -816,6 +819,7 @@ TESTPLANARTOB(H420, 2, 2, AB30, 4, 4, 1)
#endif #endif
TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, RGB24Filter, 3, 3, 1)
#else #else
TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1)
TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1)
@ -832,13 +836,13 @@ TESTPLANARTOB(I422, 2, 1, RGB565, 2, 2, 1)
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1) TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1) TESTPLANARTOB(I420, 2, 2, UYVY, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1) TESTPLANARTOB(I420, 2, 2, YUY2, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGB, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, BGRA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, RGBA, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1) TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
TESTPLANARTOB(I420, 2, 2, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1) TESTPLANARTOB(I422, 2, 1, ARGBFilter, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
@ -1412,7 +1416,7 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
EPP_B, STRIDE_B, HEIGHT_B) EPP_B, STRIDE_B, HEIGHT_B)
#else #else
#define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \ #define TESTATOB(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B, INPLACE) \ EPP_B, STRIDE_B, HEIGHT_B) \
TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \ TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, EPP_B, \
STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0) STRIDE_B, HEIGHT_B, benchmark_width_, _Opt, +, 0)
#endif #endif

View File

@ -42,9 +42,9 @@ static __inline uint32_t Abs(int32_t v) {
} }
// Parse PYUV format. ie name.1920x800_24Hz_P420.yuv // Parse PYUV format. ie name.1920x800_24Hz_P420.yuv
bool ExtractResolutionFromFilename(const char* name, static bool ExtractResolutionFromFilename(const char* name,
int* width_ptr, int* width_ptr,
int* height_ptr) { int* height_ptr) {
// Isolate the .width_height. section of the filename by searching for a // Isolate the .width_height. section of the filename by searching for a
// dot or underscore followed by a digit. // dot or underscore followed by a digit.
for (int i = 0; name[i]; ++i) { for (int i = 0; name[i]; ++i) {
@ -59,7 +59,7 @@ bool ExtractResolutionFromFilename(const char* name,
return false; return false;
} }
void PrintHelp(const char* program) { static void PrintHelp(const char* program) {
printf("%s [-options] src_argb.raw dst_yuv.raw\n", program); printf("%s [-options] src_argb.raw dst_yuv.raw\n", program);
printf( printf(
" -s <width> <height> .... specify source resolution. " " -s <width> <height> .... specify source resolution. "
@ -78,7 +78,7 @@ void PrintHelp(const char* program) {
exit(0); exit(0);
} }
void ParseOptions(int argc, const char* argv[]) { static void ParseOptions(int argc, const char* argv[]) {
if (argc <= 1) { if (argc <= 1) {
PrintHelp(argv[0]); PrintHelp(argv[0]);
} }