mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-06-15 08:26:06 +08:00
BGRAToI420 use BgraConstants for a direct conversion using AVX512BW
Bug: 42280902 Change-Id: I52cb2b0cacea8f2f0b138ec3cc521185dbef8595
This commit is contained in:
parent
f722313c74
commit
51aa1f5ce2
44
GEMINI.md
44
GEMINI.md
@ -1,44 +1,62 @@
|
|||||||
# Gemini Project Context: libyuv Row Functions
|
# Gemini Project Context: libyuv Row Functions
|
||||||
|
|
||||||
This file provides context for the core row-processing architecture of libyuv. Use these guidelines when refactoring, reviewing, or generating code within the `row_*.cc` files.
|
This file provides context for the core row-processing architecture of
|
||||||
|
libyuv. Use these guidelines when refactoring, reviewing, or generating
|
||||||
|
code within the `row_*.cc` files.
|
||||||
|
|
||||||
## Architectural Overview
|
## Architectural Overview
|
||||||
|
|
||||||
Libyuv uses a dispatch system where high-level conversion functions call optimized "Row" functions. These functions are categorized by SIMD architecture and compiler compatibility.
|
Libyuv uses a dispatch system where high-level conversion functions call
|
||||||
|
optimized "Row" functions. These functions are categorized by SIMD architecture
|
||||||
|
and compiler compatibility.
|
||||||
|
|
||||||
## Source File Map
|
## Source File Map
|
||||||
|
|
||||||
### x86 Architectures (32-bit and 64-bit)
|
### x86 Architectures (32-bit and 64-bit)
|
||||||
|
|
||||||
* **row_gcc.cc**: **Master copy.** Contains inline assembly in GCC syntax for GCC and Clang. Supports AVX, and AVX512. AVX512 implementations are strictly for 64-bit targets.
|
* **row_gcc.cc**: **Master copy.** Contains inline assembly in GCC syntax for
|
||||||
* **row_win.cc**: Derivative of `row_gcc.cc`. Contains C++ intrinsics specifically for Visual C++ (MSVC). Can be tested with Clang using `-DLIBYUV_ENABLE_ROWWIN`.
|
GCC and Clang. Supports AVX, and AVX512. AVX512 implementations are strictly
|
||||||
|
for 64-bit targets.
|
||||||
|
* **row_win.cc**: Derivative of `row_gcc.cc`. Contains C++ intrinsics
|
||||||
|
specifically for Visual C++ (MSVC). Can be tested with Clang using
|
||||||
|
`-DLIBYUV_ENABLE_ROWWIN`.
|
||||||
* **Note**: Use either `row_gcc` or `row_win`, never both.
|
* **Note**: Use either `row_gcc` or `row_win`, never both.
|
||||||
|
|
||||||
### ARM Architectures
|
### ARM Architectures
|
||||||
|
|
||||||
* **row_neon.cc**: 32-bit ARM. Written entirely in inline assembly for GCC/Clang.
|
* **row_neon.cc**: 32-bit ARM. Written entirely in inline assembly for
|
||||||
* **row_neon64.cc**: 64-bit ARM (AArch64). Written entirely in inline assembly for GCC/Clang.
|
GCC/Clang.
|
||||||
|
* **row_neon64.cc**: 64-bit ARM (AArch64). Written entirely in inline assembly
|
||||||
|
for GCC/Clang.
|
||||||
* **row_sve.cc**: ARMv9 Scalable Vector Extensions (SVE).
|
* **row_sve.cc**: ARMv9 Scalable Vector Extensions (SVE).
|
||||||
* **row_sme.cc**: ARMv9 Scalable Matrix Extension (SME) and Streaming SVE (SSVE).
|
* **row_sme.cc**: ARMv9 Scalable Matrix Extension (SME) and Streaming SVE
|
||||||
|
(SSVE).
|
||||||
|
|
||||||
### Other Architectures
|
### Other Architectures
|
||||||
|
|
||||||
* **row_rvv.cc**: RISC-V Vector (RVV). Implemented using intrinsics. Optimized for SiFive X280.
|
* **row_rvv.cc**: RISC-V Vector (RVV). Implemented using intrinsics. Optimized
|
||||||
|
for SiFive X280.
|
||||||
* **row_lsx.cc / row_lasx.cc**: Loongarch MIPS-like extensions.
|
* **row_lsx.cc / row_lasx.cc**: Loongarch MIPS-like extensions.
|
||||||
|
|
||||||
### Utility and Fallbacks
|
### Utility and Fallbacks
|
||||||
|
|
||||||
* **row_common.cc**: Portable C/C++ versions. This is the reference implementation.
|
* **row_common.cc**: Portable C/C++ versions. This is the reference
|
||||||
* **row_any.cc**: Handles "remainder" pixels for widths not multiples of SIMD register size. Used for x86, NEON, and MIPS. Not required for SVE, SME, or RVV due to hardware-level masking.
|
implementation.
|
||||||
|
* **row_any.cc**: Handles "remainder" pixels for widths not multiples of SIMD
|
||||||
|
register size. Used for x86, NEON, and MIPS. Not required for SVE, SME, or
|
||||||
|
RVV due to hardware-level masking.
|
||||||
|
|
||||||
## Coding Guidelines
|
## Coding Guidelines
|
||||||
|
|
||||||
1. **AVX512 Logic**: AVX512 row functions are strictly enabled for **64-bit x86 only**.
|
1. **AVX512 Logic**: AVX512 row functions are strictly enabled for **64-bit x86
|
||||||
2. **Feature Macros**: Use the `HAS_` macros in `include/libyuv/row.h` to enable or disable specific AVX512 versions.
|
only**.
|
||||||
|
2. **Feature Macros**: Use the `HAS_` macros in `include/libyuv/row.h` to
|
||||||
|
enable or disable specific AVX512 versions.
|
||||||
|
|
||||||
## Changelist (CL) & Commit Guidelines
|
## Changelist (CL) & Commit Guidelines
|
||||||
|
|
||||||
When generating descriptions, follow the Chromium/Google standard format. Wrap commit message text at 72 characters
|
When generating descriptions, follow the Chromium/Google standard format. Wrap
|
||||||
|
commit message text at 72 characters
|
||||||
|
|
||||||
### Format Example:
|
### Format Example:
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||||
Version: 1946
|
Version: 1947
|
||||||
Revision: DEPS
|
Revision: DEPS
|
||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|||||||
@ -890,42 +890,21 @@ int ABGRToI420(const uint8_t* src_abgr,
|
|||||||
|
|
||||||
// BGRA little endian (argb in memory) to I422.
|
// BGRA little endian (argb in memory) to I422.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int BGRAToI422(const uint8_t* src_bgra,
|
int BGRAToI422(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_y,
|
||||||
int src_stride_bgra,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height);
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height);
|
|
||||||
|
|
||||||
// ABGR little endian (rgba in memory) to I422.
|
// ABGR little endian (rgba in memory) to I422.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ABGRToI422(const uint8_t* src_abgr,
|
int ABGRToI422(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y,
|
||||||
int src_stride_abgr,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height);
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height);
|
|
||||||
|
|
||||||
// RGBA little endian (abgr in memory) to I422.
|
// RGBA little endian (abgr in memory) to I422.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RGBAToI422(const uint8_t* src_rgba,
|
int RGBAToI422(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_y,
|
||||||
int src_stride_rgba,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height);
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height);
|
|
||||||
|
|
||||||
// RGBA little endian (abgr in memory) to I420.
|
// RGBA little endian (abgr in memory) to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
@ -247,16 +247,9 @@ int ARGBToI422(const uint8_t* src_argb,
|
|||||||
|
|
||||||
// Convert ABGR To I422.
|
// Convert ABGR To I422.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ABGRToI422(const uint8_t* src_abgr,
|
int ABGRToI422(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y,
|
||||||
int src_stride_abgr,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height);
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height);
|
|
||||||
|
|
||||||
// RGB to I444 with matrix. See ArgbConstants at the top of this file for usage.
|
// RGB to I444 with matrix. See ArgbConstants at the top of this file for usage.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
@ -352,9 +352,11 @@ extern "C" {
|
|||||||
#define HAS_RGB565TOARGBROW_AVX2
|
#define HAS_RGB565TOARGBROW_AVX2
|
||||||
#define HAS_ARGB1555TOARGBROW_AVX2
|
#define HAS_ARGB1555TOARGBROW_AVX2
|
||||||
#define HAS_ARGB4444TOARGBROW_AVX2
|
#define HAS_ARGB4444TOARGBROW_AVX2
|
||||||
|
#define HAS_ARGBSHUFFLEROW_AVX2
|
||||||
#if defined(__x86_64__) || defined(_M_X64)
|
#if defined(__x86_64__) || defined(_M_X64)
|
||||||
#define HAS_RAWTOARGBROW_AVX512BW
|
#define HAS_RAWTOARGBROW_AVX512BW
|
||||||
#define HAS_RGB24TOARGBROW_AVX512BW
|
#define HAS_RGB24TOARGBROW_AVX512BW
|
||||||
|
#define HAS_ARGBSHUFFLEROW_AVX512BW
|
||||||
#endif
|
#endif
|
||||||
#define HAS_ARGBTOYROW_AVX2
|
#define HAS_ARGBTOYROW_AVX2
|
||||||
#define HAS_ARGBTOYMATRIXROW_AVX2
|
#define HAS_ARGBTOYMATRIXROW_AVX2
|
||||||
@ -383,7 +385,6 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
#define HAS_ARGBTORGB24ROW_AVX512VBMI
|
#define HAS_ARGBTORGB24ROW_AVX512VBMI
|
||||||
#define HAS_CONVERT16TO8ROW_AVX512BW
|
#define HAS_CONVERT16TO8ROW_AVX512BW
|
||||||
#define HAS_MERGEUVROW_AVX512BW
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for AVX512 clang x64 platforms:
|
// The following are available for AVX512 clang x64 platforms:
|
||||||
@ -401,6 +402,11 @@ extern "C" {
|
|||||||
#define HAS_ARGBTOUVJROW_AVX512BW
|
#define HAS_ARGBTOUVJROW_AVX512BW
|
||||||
#define HAS_ARGBTOUVMATRIXROW_AVX512BW
|
#define HAS_ARGBTOUVMATRIXROW_AVX512BW
|
||||||
#define HAS_J400TOARGBROW_AVX512BW
|
#define HAS_J400TOARGBROW_AVX512BW
|
||||||
|
#define HAS_MERGEUVROW_AVX512BW
|
||||||
|
#define HAS_MIRRORROW_AVX512BW
|
||||||
|
#define HAS_MIRRORSPLITUVROW_AVX512BW
|
||||||
|
#define HAS_SPLITUVROW_AVX512BW
|
||||||
|
#define HAS_RGBTOUVMATRIXROW_AVX512BW
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on Neon platforms:
|
// The following are available on Neon platforms:
|
||||||
@ -2180,29 +2186,27 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width,
|
int width,
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGBToYMatrixRow_C(const uint8_t* src_rgb,
|
void RGBToYMatrixRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGBToUVMatrixRow_C(const uint8_t* src_rgb,
|
void RGBToUVMatrixRow_C(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
int src_stride_rgb,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGB565ToYMatrixRow_C(const uint8_t* src_rgb565,
|
void RGB565ToYMatrixRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void ARGB1555ToYMatrixRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
void ARGB1555ToYMatrixRow_C(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
void ARGB1555ToUVMatrixRow_C(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
int width, const struct ArgbConstants* c);
|
||||||
void ARGB4444ToYMatrixRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
void ARGB1555ToUVMatrixRow_C(const uint8_t* src_argb1555,
|
||||||
void ARGB4444ToUVMatrixRow_C(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
int src_stride_argb1555, uint8_t* dst_u,
|
||||||
void RGB565ToUVMatrixRow_C(const uint8_t* src_rgb565,
|
uint8_t* dst_v, int width,
|
||||||
int src_stride_rgb565,
|
const struct ArgbConstants* c);
|
||||||
uint8_t* dst_u,
|
void ARGB4444ToYMatrixRow_C(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
uint8_t* dst_v,
|
int width, const struct ArgbConstants* c);
|
||||||
int width,
|
void ARGB4444ToUVMatrixRow_C(const uint8_t* src_argb4444,
|
||||||
|
int src_stride_argb4444, uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void RGB565ToUVMatrixRow_C(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||||
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
@ -2210,8 +2214,18 @@ void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width,
|
int width,
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGBToUVMatrixRow_AVX2(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
void RGBToUVMatrixRow_AVX2(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
void RGBToUVMatrixRow_Any_AVX2(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void RGBToUVMatrixRow_Any_AVX2(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void RGBToUVMatrixRow_AVX512BW(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void RGBToUVMatrixRow_Any_AVX512BW(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -2233,86 +2247,80 @@ void ARGBToYMatrixRow_SSSE3(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
int width,
|
int width,
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGBToYMatrixRow_AVX2(const uint8_t* src_rgb,
|
void RGBToYMatrixRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_y, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGBToYMatrixRow_Any_AVX2(const uint8_t* src_rgb,
|
void RGBToYMatrixRow_Any_AVX2(const uint8_t* src_rgb, uint8_t* dst_y, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGB565ToYMatrixRow_AVX2(const uint8_t* src_rgb565,
|
void RGB565ToYMatrixRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c);
|
||||||
int width,
|
void RGB565ToYMatrixRow_Any_AVX2(const uint8_t* src_rgb565, uint8_t* dst_y,
|
||||||
const struct ArgbConstants* c);
|
int width, const struct ArgbConstants* c);
|
||||||
void RGB565ToYMatrixRow_Any_AVX2(const uint8_t* src_rgb565,
|
void ARGB1555ToYMatrixRow_AVX2(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c);
|
||||||
int width,
|
void ARGB1555ToYMatrixRow_Any_AVX2(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
const struct ArgbConstants* c);
|
int width, const struct ArgbConstants* c);
|
||||||
void ARGB1555ToYMatrixRow_AVX2(const uint8_t* src_argb1555,
|
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
|
||||||
void ARGB1555ToYMatrixRow_Any_AVX2(const uint8_t* src_argb1555,
|
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
|
||||||
void ARGB1555ToUVMatrixRow_AVX2(const uint8_t* src_argb1555,
|
void ARGB1555ToUVMatrixRow_AVX2(const uint8_t* src_argb1555,
|
||||||
int src_stride_argb1555,
|
int src_stride_argb1555, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void ARGB1555ToUVMatrixRow_Any_AVX2(const uint8_t* src_argb1555,
|
void ARGB1555ToUVMatrixRow_Any_AVX2(const uint8_t* src_argb1555,
|
||||||
int src_stride_argb1555,
|
int src_stride_argb1555, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void ARGB4444ToYMatrixRow_AVX2(const uint8_t* src_argb4444,
|
void ARGB4444ToYMatrixRow_AVX2(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c);
|
||||||
int width,
|
void ARGB4444ToYMatrixRow_Any_AVX2(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
const struct ArgbConstants* c);
|
int width, const struct ArgbConstants* c);
|
||||||
void ARGB4444ToYMatrixRow_Any_AVX2(const uint8_t* src_argb4444,
|
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
|
||||||
void ARGB4444ToUVMatrixRow_AVX2(const uint8_t* src_argb4444,
|
void ARGB4444ToUVMatrixRow_AVX2(const uint8_t* src_argb4444,
|
||||||
int src_stride_argb4444,
|
int src_stride_argb4444, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void ARGB4444ToUVMatrixRow_Any_AVX2(const uint8_t* src_argb4444,
|
void ARGB4444ToUVMatrixRow_Any_AVX2(const uint8_t* src_argb4444,
|
||||||
int src_stride_argb4444,
|
int src_stride_argb4444, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGB565ToUVMatrixRow_AVX2(const uint8_t* src_rgb565,
|
void RGB565ToUVMatrixRow_AVX2(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||||
int src_stride_rgb565,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGB565ToUVMatrixRow_Any_AVX2(const uint8_t* src_rgb565,
|
void RGB565ToUVMatrixRow_Any_AVX2(const uint8_t* src_rgb565,
|
||||||
int src_stride_rgb565,
|
int src_stride_rgb565, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
const struct ArgbConstants* c);
|
||||||
int width,
|
void RGB565ToYMatrixRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y,
|
||||||
|
int width, const struct ArgbConstants* c);
|
||||||
|
void ARGB1555ToYMatrixRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
|
int width, const struct ArgbConstants* c);
|
||||||
|
void ARGB1555ToUVMatrixRow_NEON(const uint8_t* src_argb1555,
|
||||||
|
int src_stride_argb1555, uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void ARGB4444ToYMatrixRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
|
int width, const struct ArgbConstants* c);
|
||||||
|
void ARGB4444ToUVMatrixRow_NEON(const uint8_t* src_argb4444,
|
||||||
|
int src_stride_argb4444, uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void RGB565ToUVMatrixRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||||
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void RGB565ToYMatrixRow_Any_NEON(const uint8_t* src_rgb565, uint8_t* dst_y,
|
||||||
|
int width, const struct ArgbConstants* c);
|
||||||
|
void ARGB1555ToYMatrixRow_Any_NEON(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
|
int width, const struct ArgbConstants* c);
|
||||||
|
void ARGB1555ToUVMatrixRow_Any_NEON(const uint8_t* src_argb1555,
|
||||||
|
int src_stride_argb1555, uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void ARGB4444ToYMatrixRow_Any_NEON(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
|
int width, const struct ArgbConstants* c);
|
||||||
|
void ARGB4444ToUVMatrixRow_Any_NEON(const uint8_t* src_argb4444,
|
||||||
|
int src_stride_argb4444, uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void RGB565ToUVMatrixRow_Any_NEON(const uint8_t* src_rgb565,
|
||||||
|
int src_stride_rgb565, uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v, int width,
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
void RGB565ToYMatrixRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
|
||||||
void ARGB1555ToYMatrixRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
|
||||||
void ARGB1555ToUVMatrixRow_NEON(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
|
||||||
void ARGB4444ToYMatrixRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
|
||||||
void ARGB4444ToUVMatrixRow_NEON(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
|
||||||
void RGB565ToUVMatrixRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
|
||||||
void RGB565ToYMatrixRow_Any_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
|
||||||
void ARGB1555ToYMatrixRow_Any_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
|
||||||
void ARGB1555ToUVMatrixRow_Any_NEON(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
|
||||||
void ARGB4444ToYMatrixRow_Any_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
|
||||||
void ARGB4444ToUVMatrixRow_Any_NEON(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
|
||||||
void RGB565ToUVMatrixRow_Any_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
|
||||||
|
|
||||||
void ARGBToYMatrixRow_AVX2(const uint8_t* src_argb,
|
void ARGBToYMatrixRow_AVX2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
@ -2340,9 +2348,14 @@ void RGBToYMatrixRow_NEON(const uint8_t* src_rgb,
|
|||||||
int width,
|
int width,
|
||||||
const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
|
|
||||||
void RGBToUVMatrixRow_NEON(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
void RGBToUVMatrixRow_NEON(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
void RGBToYMatrixRow_Any_NEON(const uint8_t* src_rgb, uint8_t* dst_y, int width, const struct ArgbConstants* c);
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
void RGBToUVMatrixRow_Any_NEON(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c);
|
const struct ArgbConstants* c);
|
||||||
|
void RGBToYMatrixRow_Any_NEON(const uint8_t* src_rgb, uint8_t* dst_y, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
void RGBToUVMatrixRow_Any_NEON(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c);
|
||||||
|
|
||||||
void ARGBToYMatrixRow_NEON_DotProd(const uint8_t* src_argb,
|
void ARGBToYMatrixRow_NEON_DotProd(const uint8_t* src_argb,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
@ -3040,12 +3053,15 @@ void ARGBToUVJ444Row_C(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
|
void MirrorRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
|
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
|
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
|
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
|
void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
|
void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
|
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
|
||||||
|
void MirrorRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr,
|
||||||
|
int width);
|
||||||
void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
||||||
@ -3063,9 +3079,9 @@ void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
|||||||
void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
|
|
||||||
void MirrorSplitUVRow_AVX2(const uint8_t* src,
|
void MirrorSplitUVRow_AVX512BW(const uint8_t* src, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width);
|
||||||
uint8_t* dst_v,
|
void MirrorSplitUVRow_AVX2(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void MirrorSplitUVRow_NEON(const uint8_t* src_uv,
|
void MirrorSplitUVRow_NEON(const uint8_t* src_uv,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -3102,15 +3118,13 @@ void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
|
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
|
||||||
uint8_t* dst_rgb24,
|
|
||||||
int width);
|
int width);
|
||||||
void RGB24MirrorRow_NEON(const uint8_t* src_rgb24,
|
void RGB24MirrorRow_NEON(const uint8_t* src_rgb24,
|
||||||
uint8_t* dst_rgb24,
|
uint8_t* dst_rgb24,
|
||||||
int width);
|
int width);
|
||||||
void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width);
|
void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width);
|
||||||
void RGB24MirrorRow_Any_AVX2(const uint8_t* src_ptr,
|
void RGB24MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr,
|
||||||
uint8_t* dst_ptr,
|
|
||||||
int width);
|
int width);
|
||||||
void RGB24MirrorRow_Any_NEON(const uint8_t* src_ptr,
|
void RGB24MirrorRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -3124,6 +3138,8 @@ void SplitUVRow_SSE2(const uint8_t* src_uv,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
|
void SplitUVRow_AVX512BW(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
|
||||||
|
int width);
|
||||||
void SplitUVRow_AVX2(const uint8_t* src_uv,
|
void SplitUVRow_AVX2(const uint8_t* src_uv,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -3140,6 +3156,8 @@ void SplitUVRow_RVV(const uint8_t* src_uv,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
|
void SplitUVRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v, int width);
|
||||||
void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
|
void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -4114,10 +4132,8 @@ void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const uint8_t* shuffler,
|
const uint8_t* shuffler,
|
||||||
int width);
|
int width);
|
||||||
void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb,
|
void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb, uint8_t* dst_argb,
|
||||||
uint8_t* dst_argb,
|
const uint8_t* shuffler, int width);
|
||||||
const uint8_t* shuffler,
|
|
||||||
int width);
|
|
||||||
void ARGBShuffleRow_NEON(const uint8_t* src_argb,
|
void ARGBShuffleRow_NEON(const uint8_t* src_argb,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const uint8_t* shuffler,
|
const uint8_t* shuffler,
|
||||||
@ -4138,10 +4154,8 @@ void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const uint8_t* param,
|
const uint8_t* param,
|
||||||
int width);
|
int width);
|
||||||
void ARGBShuffleRow_Any_AVX512BW(const uint8_t* src_ptr,
|
void ARGBShuffleRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr,
|
||||||
uint8_t* dst_ptr,
|
const uint8_t* param, int width);
|
||||||
const uint8_t* param,
|
|
||||||
int width);
|
|
||||||
void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr,
|
void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const uint8_t* param,
|
const uint8_t* param,
|
||||||
@ -4160,7 +4174,8 @@ void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
|
|||||||
int width);
|
int width);
|
||||||
void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||||
void RAWToARGBRow_AVX2(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
void RAWToARGBRow_AVX2(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||||
void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||||
void RAWToRGBARow_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
|
void RAWToRGBARow_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
|
||||||
void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
||||||
@ -4250,20 +4265,14 @@ void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
|
|||||||
void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
|
void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
void RAWToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
void RAWToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||||
uint8_t* dst_ptr,
|
void RGB24ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr,
|
||||||
int width);
|
|
||||||
void RGB24ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr,
|
|
||||||
int width);
|
int width);
|
||||||
void RAWToARGBRow_Any_AVX512BW(const uint8_t* src_ptr,
|
void RAWToARGBRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr,
|
||||||
uint8_t* dst_ptr,
|
|
||||||
int width);
|
int width);
|
||||||
void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24,
|
void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24, uint8_t* dst_argb,
|
||||||
uint8_t* dst_argb,
|
|
||||||
int width);
|
int width);
|
||||||
void RGB24ToARGBRow_Any_AVX512BW(const uint8_t* src_ptr,
|
void RGB24ToARGBRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr,
|
||||||
uint8_t* dst_ptr,
|
|
||||||
int width);
|
int width);
|
||||||
void RAWToRGBARow_Any_SSSE3(const uint8_t* src_ptr,
|
void RAWToRGBARow_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -4272,7 +4281,6 @@ void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
|
|
||||||
void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int width);
|
int width);
|
||||||
@ -4512,8 +4520,7 @@ void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
|||||||
void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
void J400ToARGBRow_Any_AVX512BW(const uint8_t* src_ptr,
|
void J400ToARGBRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr,
|
||||||
uint8_t* dst_ptr,
|
|
||||||
int width);
|
int width);
|
||||||
void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
@ -4729,12 +4736,9 @@ void I444ToARGBRow_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
void I444ToARGBRow_AVX512BW(const uint8_t* y_buf,
|
void I444ToARGBRow_AVX512BW(const uint8_t* y_buf, const uint8_t* u_buf,
|
||||||
const uint8_t* u_buf,
|
const uint8_t* v_buf, uint8_t* dst_argb,
|
||||||
const uint8_t* v_buf,
|
const struct YuvConstants* yuvconstants, int width);
|
||||||
uint8_t* dst_argb,
|
|
||||||
const struct YuvConstants* yuvconstants,
|
|
||||||
int width);
|
|
||||||
void I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
|
void I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
|
||||||
const uint8_t* u_buf,
|
const uint8_t* u_buf,
|
||||||
const uint8_t* v_buf,
|
const uint8_t* v_buf,
|
||||||
@ -5057,10 +5061,8 @@ void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
void I444ToARGBRow_Any_AVX512BW(const uint8_t* y_buf,
|
void I444ToARGBRow_Any_AVX512BW(const uint8_t* y_buf, const uint8_t* u_buf,
|
||||||
const uint8_t* u_buf,
|
const uint8_t* v_buf, uint8_t* dst_ptr,
|
||||||
const uint8_t* v_buf,
|
|
||||||
uint8_t* dst_ptr,
|
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width);
|
int width);
|
||||||
void I444ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
|
void I444ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
|
||||||
@ -6712,15 +6714,11 @@ void InterpolateRow_16_C(uint16_t* dst_ptr,
|
|||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int width,
|
int width,
|
||||||
int source_y_fraction);
|
int source_y_fraction);
|
||||||
void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
|
void InterpolateRow_16_AVX2(uint16_t* dst_ptr, const uint16_t* src_ptr,
|
||||||
const uint16_t* src_ptr,
|
ptrdiff_t src_stride, int width,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
int width,
|
|
||||||
int source_y_fraction);
|
int source_y_fraction);
|
||||||
void InterpolateRow_16_Any_AVX2(uint16_t* dst_ptr,
|
void InterpolateRow_16_Any_AVX2(uint16_t* dst_ptr, const uint16_t* src_ptr,
|
||||||
const uint16_t* src_ptr,
|
ptrdiff_t src_stride, int width,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
int width,
|
|
||||||
int source_y_fraction);
|
int source_y_fraction);
|
||||||
void InterpolateRow_16_NEON(uint16_t* dst_ptr,
|
void InterpolateRow_16_NEON(uint16_t* dst_ptr,
|
||||||
const uint16_t* src_ptr,
|
const uint16_t* src_ptr,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1946
|
#define LIBYUV_VERSION 1947
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -13,12 +13,11 @@
|
|||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
|
#include "libyuv/convert_from_argb.h"
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
#include "libyuv/convert_from_argb.h"
|
|
||||||
#include "libyuv/rotate.h"
|
#include "libyuv/rotate.h"
|
||||||
#include "libyuv/row.h"
|
#include "libyuv/row.h"
|
||||||
|
|
||||||
#include "libyuv/scale.h" // For ScalePlane()
|
#include "libyuv/scale.h" // For ScalePlane()
|
||||||
#include "libyuv/scale_row.h" // For FixedDiv
|
#include "libyuv/scale_row.h" // For FixedDiv
|
||||||
#include "libyuv/scale_uv.h" // For UVScale()
|
#include "libyuv/scale_uv.h" // For UVScale()
|
||||||
@ -948,8 +947,7 @@ int I422ToNV21(const uint8_t* src_y,
|
|||||||
|
|
||||||
// Allocate u and v buffers
|
// Allocate u and v buffers
|
||||||
const uint64_t plane_size = (uint64_t)halfwidth * halfheight;
|
const uint64_t plane_size = (uint64_t)halfwidth * halfheight;
|
||||||
if (plane_size > SIZE_MAX / 2)
|
if (plane_size > SIZE_MAX / 2) return 1;
|
||||||
return 1;
|
|
||||||
align_buffer_64(plane_u, (size_t)plane_size * 2);
|
align_buffer_64(plane_u, (size_t)plane_size * 2);
|
||||||
if (!plane_u)
|
if (!plane_u)
|
||||||
return 1;
|
return 1;
|
||||||
@ -2034,8 +2032,8 @@ int ARGBToI420(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
return ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
return ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kArgbI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kArgbI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
@ -2121,34 +2119,34 @@ ARGBToUVMatrixRow_C;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
||||||
if (TestCpuFlag(kCpuHasSVE2)) {
|
if (TestCpuFlag(kCpuHasSVE2)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
||||||
if (TestCpuFlag(kCpuHasSME)) {
|
if (TestCpuFlag(kCpuHasSME)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
@ -2207,18 +2205,10 @@ ARGBToUVMatrixRow_C;
|
|||||||
// Convert ARGB to I420 with Alpha
|
// Convert ARGB to I420 with Alpha
|
||||||
// The following version calls ARGBExtractAlpha on the full image.
|
// The following version calls ARGBExtractAlpha on the full image.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToI420Alpha(const uint8_t* src_argb,
|
int ARGBToI420Alpha(const uint8_t* src_argb, int src_stride_argb,
|
||||||
int src_stride_argb,
|
uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
|
||||||
uint8_t* dst_y,
|
int dst_stride_u, uint8_t* dst_v, int dst_stride_v,
|
||||||
int dst_stride_y,
|
uint8_t* dst_a, int dst_stride_a, int width, int height) {
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
uint8_t* dst_a,
|
|
||||||
int dst_stride_a,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int r = ARGBToI420(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
int r = ARGBToI420(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, width, height);
|
dst_stride_u, dst_v, dst_stride_v, width, height);
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
@ -2230,18 +2220,10 @@ int ARGBToI420Alpha(const uint8_t* src_argb,
|
|||||||
#else // USE_EXTRACTALPHA
|
#else // USE_EXTRACTALPHA
|
||||||
// Convert ARGB to I420 with Alpha
|
// Convert ARGB to I420 with Alpha
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToI420Alpha(const uint8_t* src_argb,
|
int ARGBToI420Alpha(const uint8_t* src_argb, int src_stride_argb,
|
||||||
int src_stride_argb,
|
uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
|
||||||
uint8_t* dst_y,
|
int dst_stride_u, uint8_t* dst_v, int dst_stride_v,
|
||||||
int dst_stride_y,
|
uint8_t* dst_a, int dst_stride_a, int width, int height) {
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
uint8_t* dst_a,
|
|
||||||
int dst_stride_a,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
@ -2428,104 +2410,62 @@ int ARGBToI420Alpha(const uint8_t* src_argb,
|
|||||||
|
|
||||||
// Convert BGRA to I420.
|
// Convert BGRA to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int BGRAToI420(const uint8_t* src_bgra,
|
int BGRAToI420(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_y,
|
||||||
int src_stride_bgra,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI420Matrix(src_bgra, src_stride_bgra, dst_y, dst_stride_y, dst_u,
|
return ARGBToI420Matrix(src_bgra, src_stride_bgra, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kBgraI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kBgraI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert BGRA to I422.
|
// Convert BGRA to I422.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int BGRAToI422(const uint8_t* src_bgra,
|
int BGRAToI422(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_y,
|
||||||
int src_stride_bgra,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI422Matrix(src_bgra, src_stride_bgra, dst_y, dst_stride_y, dst_u,
|
return ARGBToI422Matrix(src_bgra, src_stride_bgra, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kBgraI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kBgraI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ABGR to I422.
|
// Convert ABGR to I422.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ABGRToI422(const uint8_t* src_abgr,
|
int ABGRToI422(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y,
|
||||||
int src_stride_abgr,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI422Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u,
|
return ARGBToI422Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kAbgrI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kAbgrI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert RGBA to I422.
|
// Convert RGBA to I422.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RGBAToI422(const uint8_t* src_rgba,
|
int RGBAToI422(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_y,
|
||||||
int src_stride_rgba,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI422Matrix(src_rgba, src_stride_rgba, dst_y, dst_stride_y, dst_u,
|
return ARGBToI422Matrix(src_rgba, src_stride_rgba, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kRgbaI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kRgbaI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ABGR to I420.
|
// Convert ABGR to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ABGRToI420(const uint8_t* src_abgr,
|
int ABGRToI420(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y,
|
||||||
int src_stride_abgr,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI420Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u,
|
return ARGBToI420Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kAbgrI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kAbgrI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert RGBA to I420.
|
// Convert RGBA to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RGBAToI420(const uint8_t* src_rgba,
|
int RGBAToI420(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_y,
|
||||||
int src_stride_rgba,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI420Matrix(src_rgba, src_stride_rgba, dst_y, dst_stride_y, dst_u,
|
return ARGBToI420Matrix(src_rgba, src_stride_rgba, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kRgbaI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kRgbaI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enabled if 1 pass is available
|
// Enabled if 1 pass is available
|
||||||
@ -2536,16 +2476,9 @@ int RGBAToI420(const uint8_t* src_rgba,
|
|||||||
|
|
||||||
// Convert RGB24 to I420.
|
// Convert RGB24 to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RGB24ToI420(const uint8_t* src_rgb24,
|
int RGB24ToI420(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_y,
|
||||||
int src_stride_rgb24,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*RGBToUVMatrixRow)(const uint8_t* src_rgb, int src_stride_rgb,
|
void (*RGBToUVMatrixRow)(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
@ -2569,6 +2502,14 @@ int RGB24ToI420(const uint8_t* src_rgb24,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_RGBTOUVMATRIXROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
RGBToUVMatrixRow = RGBToUVMatrixRow_Any_AVX512BW;
|
||||||
|
if (IS_ALIGNED(width, 64)) {
|
||||||
|
RGBToUVMatrixRow = RGBToUVMatrixRow_AVX512BW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_RGBTOUVMATRIXROW_NEON)
|
#if defined(HAS_RGBTOUVMATRIXROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
RGBToUVMatrixRow = RGBToUVMatrixRow_Any_NEON;
|
RGBToUVMatrixRow = RGBToUVMatrixRow_Any_NEON;
|
||||||
@ -2603,9 +2544,11 @@ int RGB24ToI420(const uint8_t* src_rgb24,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (y = 0; y < height - 1; y += 2) {
|
for (y = 0; y < height - 1; y += 2) {
|
||||||
RGBToUVMatrixRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width, &kArgbI601Constants);
|
RGBToUVMatrixRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width,
|
||||||
|
&kArgbI601Constants);
|
||||||
RGBToYMatrixRow(src_rgb24, dst_y, width, &kArgbI601Constants);
|
RGBToYMatrixRow(src_rgb24, dst_y, width, &kArgbI601Constants);
|
||||||
RGBToYMatrixRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width, &kArgbI601Constants);
|
RGBToYMatrixRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width,
|
||||||
|
&kArgbI601Constants);
|
||||||
src_rgb24 += src_stride_rgb24 * 2;
|
src_rgb24 += src_stride_rgb24 * 2;
|
||||||
dst_y += dst_stride_y * 2;
|
dst_y += dst_stride_y * 2;
|
||||||
dst_u += dst_stride_u;
|
dst_u += dst_stride_u;
|
||||||
@ -2626,16 +2569,9 @@ int RGB24ToI420(const uint8_t* src_rgb24,
|
|||||||
|
|
||||||
// Convert RGB24 to J420.
|
// Convert RGB24 to J420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RGB24ToJ420(const uint8_t* src_rgb24,
|
int RGB24ToJ420(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_y,
|
||||||
int src_stride_rgb24,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
#if defined(HAS_RGB24TOYJROW)
|
#if defined(HAS_RGB24TOYJROW)
|
||||||
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
|
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
|
||||||
@ -2806,8 +2742,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
|
|||||||
// Allocate 2 rows of ARGB.
|
// Allocate 2 rows of ARGB.
|
||||||
const int row_size = (width * 4 + 31) & ~31;
|
const int row_size = (width * 4 + 31) & ~31;
|
||||||
align_buffer_64(row, row_size * 2);
|
align_buffer_64(row, row_size * 2);
|
||||||
if (!row)
|
if (!row) return 1;
|
||||||
return 1;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (y = 0; y < height - 1; y += 2) {
|
for (y = 0; y < height - 1; y += 2) {
|
||||||
@ -2853,16 +2788,9 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
|
|||||||
|
|
||||||
// Convert RAW to I420.
|
// Convert RAW to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RAWToI420(const uint8_t* src_rgb24,
|
int RAWToI420(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_y,
|
||||||
int src_stride_rgb24,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*RGBToUVMatrixRow)(const uint8_t* src_rgb, int src_stride_rgb,
|
void (*RGBToUVMatrixRow)(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
@ -2886,6 +2814,14 @@ int RAWToI420(const uint8_t* src_rgb24,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_RGBTOUVMATRIXROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
RGBToUVMatrixRow = RGBToUVMatrixRow_Any_AVX512BW;
|
||||||
|
if (IS_ALIGNED(width, 64)) {
|
||||||
|
RGBToUVMatrixRow = RGBToUVMatrixRow_AVX512BW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_RGBTOUVMATRIXROW_NEON)
|
#if defined(HAS_RGBTOUVMATRIXROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
RGBToUVMatrixRow = RGBToUVMatrixRow_Any_NEON;
|
RGBToUVMatrixRow = RGBToUVMatrixRow_Any_NEON;
|
||||||
@ -2920,9 +2856,11 @@ int RAWToI420(const uint8_t* src_rgb24,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (y = 0; y < height - 1; y += 2) {
|
for (y = 0; y < height - 1; y += 2) {
|
||||||
RGBToUVMatrixRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width, &kArgbI601Constants);
|
RGBToUVMatrixRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width,
|
||||||
|
&kArgbI601Constants);
|
||||||
RGBToYMatrixRow(src_rgb24, dst_y, width, &kArgbI601Constants);
|
RGBToYMatrixRow(src_rgb24, dst_y, width, &kArgbI601Constants);
|
||||||
RGBToYMatrixRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width, &kArgbI601Constants);
|
RGBToYMatrixRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width,
|
||||||
|
&kArgbI601Constants);
|
||||||
src_rgb24 += src_stride_rgb24 * 2;
|
src_rgb24 += src_stride_rgb24 * 2;
|
||||||
dst_y += dst_stride_y * 2;
|
dst_y += dst_stride_y * 2;
|
||||||
dst_u += dst_stride_u;
|
dst_u += dst_stride_u;
|
||||||
@ -2943,16 +2881,9 @@ int RAWToI420(const uint8_t* src_rgb24,
|
|||||||
|
|
||||||
// Convert RAW to J420.
|
// Convert RAW to J420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RAWToJ420(const uint8_t* src_raw,
|
int RAWToJ420(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_y,
|
||||||
int src_stride_raw,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
#if defined(HAS_RAWTOYJROW)
|
#if defined(HAS_RAWTOYJROW)
|
||||||
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
|
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
|
||||||
@ -3161,16 +3092,9 @@ int RAWToJ420(const uint8_t* src_raw,
|
|||||||
// RAW big endian (rgb in memory) to I444
|
// RAW big endian (rgb in memory) to I444
|
||||||
// 2 step conversion of RAWToARGB then ARGBToY and ARGBToUV444
|
// 2 step conversion of RAWToARGB then ARGBToY and ARGBToUV444
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RAWToI444(const uint8_t* src_raw,
|
int RAWToI444(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_y,
|
||||||
int src_stride_raw,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||||
RAWToARGBRow_C;
|
RAWToARGBRow_C;
|
||||||
@ -3370,8 +3294,7 @@ int RAWToI444(const uint8_t* src_raw,
|
|||||||
// Allocate a row of ARGB.
|
// Allocate a row of ARGB.
|
||||||
const int row_size = width * 4;
|
const int row_size = width * 4;
|
||||||
align_buffer_64(row, row_size);
|
align_buffer_64(row, row_size);
|
||||||
if (!row)
|
if (!row) return 1;
|
||||||
return 1;
|
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
RAWToARGBRow(src_raw, row, width);
|
RAWToARGBRow(src_raw, row, width);
|
||||||
@ -3390,16 +3313,9 @@ int RAWToI444(const uint8_t* src_raw,
|
|||||||
// RAW big endian (rgb in memory) to J444
|
// RAW big endian (rgb in memory) to J444
|
||||||
// 2 step conversion of RAWToARGB then ARGBToYJ and ARGBToUVJ444
|
// 2 step conversion of RAWToARGB then ARGBToYJ and ARGBToUVJ444
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RAWToJ444(const uint8_t* src_raw,
|
int RAWToJ444(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_y,
|
||||||
int src_stride_raw,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||||
RAWToARGBRow_C;
|
RAWToARGBRow_C;
|
||||||
@ -3590,8 +3506,7 @@ int RAWToJ444(const uint8_t* src_raw,
|
|||||||
// Allocate a row of ARGB.
|
// Allocate a row of ARGB.
|
||||||
const int row_size = width * 4;
|
const int row_size = width * 4;
|
||||||
align_buffer_64(row, row_size);
|
align_buffer_64(row, row_size);
|
||||||
if (!row)
|
if (!row) return 1;
|
||||||
return 1;
|
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
RAWToARGBRow(src_raw, row, width);
|
RAWToARGBRow(src_raw, row, width);
|
||||||
@ -3609,22 +3524,18 @@ int RAWToJ444(const uint8_t* src_raw,
|
|||||||
|
|
||||||
// Convert RGB565 to I420.
|
// Convert RGB565 to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RGB565ToI420(const uint8_t* src_rgb565,
|
int RGB565ToI420(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||||
int src_stride_rgb565,
|
uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
|
||||||
uint8_t* dst_y,
|
int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width,
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*RGB565ToUVMatrixRow)(const uint8_t* src_rgb565, int src_stride_rgb565,
|
void (*RGB565ToUVMatrixRow)(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
const struct ArgbConstants* c) = RGB565ToUVMatrixRow_C;
|
const struct ArgbConstants* c) =
|
||||||
void (*RGB565ToYMatrixRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width,
|
RGB565ToUVMatrixRow_C;
|
||||||
const struct ArgbConstants* c) = RGB565ToYMatrixRow_C;
|
void (*RGB565ToYMatrixRow)(const uint8_t* src_rgb565, uint8_t* dst_y,
|
||||||
|
int width, const struct ArgbConstants* c) =
|
||||||
|
RGB565ToYMatrixRow_C;
|
||||||
|
|
||||||
#if defined(HAS_RGB565TOYMATRIXROW_AVX2)
|
#if defined(HAS_RGB565TOYMATRIXROW_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
@ -3671,9 +3582,11 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (y = 0; y < height - 1; y += 2) {
|
for (y = 0; y < height - 1; y += 2) {
|
||||||
RGB565ToUVMatrixRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width, &kArgbI601Constants);
|
RGB565ToUVMatrixRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width,
|
||||||
|
&kArgbI601Constants);
|
||||||
RGB565ToYMatrixRow(src_rgb565, dst_y, width, &kArgbI601Constants);
|
RGB565ToYMatrixRow(src_rgb565, dst_y, width, &kArgbI601Constants);
|
||||||
RGB565ToYMatrixRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width, &kArgbI601Constants);
|
RGB565ToYMatrixRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y,
|
||||||
|
width, &kArgbI601Constants);
|
||||||
src_rgb565 += src_stride_rgb565 * 2;
|
src_rgb565 += src_stride_rgb565 * 2;
|
||||||
dst_y += dst_stride_y * 2;
|
dst_y += dst_stride_y * 2;
|
||||||
dst_u += dst_stride_u;
|
dst_u += dst_stride_u;
|
||||||
@ -3681,30 +3594,25 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
if (height & 1) {
|
if (height & 1) {
|
||||||
RGB565ToYMatrixRow(src_rgb565, dst_y, width, &kArgbI601Constants);
|
RGB565ToYMatrixRow(src_rgb565, dst_y, width, &kArgbI601Constants);
|
||||||
RGB565ToUVMatrixRow(src_rgb565, 0, dst_u, dst_v, width, &kArgbI601Constants);
|
RGB565ToUVMatrixRow(src_rgb565, 0, dst_u, dst_v, width,
|
||||||
|
&kArgbI601Constants);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Convert ARGB1555 to I420.
|
// Convert ARGB1555 to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGB1555ToI420(const uint8_t* src_argb1555,
|
int ARGB1555ToI420(const uint8_t* src_argb1555, int src_stride_argb1555,
|
||||||
int src_stride_argb1555,
|
uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
|
||||||
uint8_t* dst_y,
|
int dst_stride_u, uint8_t* dst_v, int dst_stride_v,
|
||||||
int dst_stride_y,
|
int width, int height) {
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ARGB1555ToUVMatrixRow)(
|
void (*ARGB1555ToUVMatrixRow)(
|
||||||
const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u,
|
const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u,
|
||||||
uint8_t* dst_v, int width,
|
uint8_t* dst_v, int width, const struct ArgbConstants* c) =
|
||||||
const struct ArgbConstants* c) = ARGB1555ToUVMatrixRow_C;
|
ARGB1555ToUVMatrixRow_C;
|
||||||
void (*ARGB1555ToYMatrixRow)(
|
void (*ARGB1555ToYMatrixRow)(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
const uint8_t* src_argb1555, uint8_t* dst_y, int width,
|
int width, const struct ArgbConstants* c) =
|
||||||
const struct ArgbConstants* c) = ARGB1555ToYMatrixRow_C;
|
ARGB1555ToYMatrixRow_C;
|
||||||
|
|
||||||
#if defined(HAS_ARGB1555TOYMATRIXROW_AVX2)
|
#if defined(HAS_ARGB1555TOYMATRIXROW_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
@ -3751,9 +3659,11 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (y = 0; y < height - 1; y += 2) {
|
for (y = 0; y < height - 1; y += 2) {
|
||||||
ARGB1555ToUVMatrixRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width, &kArgbI601Constants);
|
ARGB1555ToUVMatrixRow(src_argb1555, src_stride_argb1555, dst_u, dst_v,
|
||||||
|
width, &kArgbI601Constants);
|
||||||
ARGB1555ToYMatrixRow(src_argb1555, dst_y, width, &kArgbI601Constants);
|
ARGB1555ToYMatrixRow(src_argb1555, dst_y, width, &kArgbI601Constants);
|
||||||
ARGB1555ToYMatrixRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, width, &kArgbI601Constants);
|
ARGB1555ToYMatrixRow(src_argb1555 + src_stride_argb1555,
|
||||||
|
dst_y + dst_stride_y, width, &kArgbI601Constants);
|
||||||
src_argb1555 += src_stride_argb1555 * 2;
|
src_argb1555 += src_stride_argb1555 * 2;
|
||||||
dst_y += dst_stride_y * 2;
|
dst_y += dst_stride_y * 2;
|
||||||
dst_u += dst_stride_u;
|
dst_u += dst_stride_u;
|
||||||
@ -3761,30 +3671,25 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
|||||||
}
|
}
|
||||||
if (height & 1) {
|
if (height & 1) {
|
||||||
ARGB1555ToYMatrixRow(src_argb1555, dst_y, width, &kArgbI601Constants);
|
ARGB1555ToYMatrixRow(src_argb1555, dst_y, width, &kArgbI601Constants);
|
||||||
ARGB1555ToUVMatrixRow(src_argb1555, 0, dst_u, dst_v, width, &kArgbI601Constants);
|
ARGB1555ToUVMatrixRow(src_argb1555, 0, dst_u, dst_v, width,
|
||||||
|
&kArgbI601Constants);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Convert ARGB4444 to I420.
|
// Convert ARGB4444 to I420.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGB4444ToI420(const uint8_t* src_argb4444,
|
int ARGB4444ToI420(const uint8_t* src_argb4444, int src_stride_argb4444,
|
||||||
int src_stride_argb4444,
|
uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u,
|
||||||
uint8_t* dst_y,
|
int dst_stride_u, uint8_t* dst_v, int dst_stride_v,
|
||||||
int dst_stride_y,
|
int width, int height) {
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ARGB4444ToUVMatrixRow)(
|
void (*ARGB4444ToUVMatrixRow)(
|
||||||
const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u,
|
const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u,
|
||||||
uint8_t* dst_v, int width,
|
uint8_t* dst_v, int width, const struct ArgbConstants* c) =
|
||||||
const struct ArgbConstants* c) = ARGB4444ToUVMatrixRow_C;
|
ARGB4444ToUVMatrixRow_C;
|
||||||
void (*ARGB4444ToYMatrixRow)(
|
void (*ARGB4444ToYMatrixRow)(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
const uint8_t* src_argb4444, uint8_t* dst_y, int width,
|
int width, const struct ArgbConstants* c) =
|
||||||
const struct ArgbConstants* c) = ARGB4444ToYMatrixRow_C;
|
ARGB4444ToYMatrixRow_C;
|
||||||
|
|
||||||
#if defined(HAS_ARGB4444TOYMATRIXROW_AVX2)
|
#if defined(HAS_ARGB4444TOYMATRIXROW_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
@ -3831,9 +3736,11 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (y = 0; y < height - 1; y += 2) {
|
for (y = 0; y < height - 1; y += 2) {
|
||||||
ARGB4444ToUVMatrixRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width, &kArgbI601Constants);
|
ARGB4444ToUVMatrixRow(src_argb4444, src_stride_argb4444, dst_u, dst_v,
|
||||||
|
width, &kArgbI601Constants);
|
||||||
ARGB4444ToYMatrixRow(src_argb4444, dst_y, width, &kArgbI601Constants);
|
ARGB4444ToYMatrixRow(src_argb4444, dst_y, width, &kArgbI601Constants);
|
||||||
ARGB4444ToYMatrixRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, width, &kArgbI601Constants);
|
ARGB4444ToYMatrixRow(src_argb4444 + src_stride_argb4444,
|
||||||
|
dst_y + dst_stride_y, width, &kArgbI601Constants);
|
||||||
src_argb4444 += src_stride_argb4444 * 2;
|
src_argb4444 += src_stride_argb4444 * 2;
|
||||||
dst_y += dst_stride_y * 2;
|
dst_y += dst_stride_y * 2;
|
||||||
dst_u += dst_stride_u;
|
dst_u += dst_stride_u;
|
||||||
@ -3841,18 +3748,15 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
|||||||
}
|
}
|
||||||
if (height & 1) {
|
if (height & 1) {
|
||||||
ARGB4444ToYMatrixRow(src_argb4444, dst_y, width, &kArgbI601Constants);
|
ARGB4444ToYMatrixRow(src_argb4444, dst_y, width, &kArgbI601Constants);
|
||||||
ARGB4444ToUVMatrixRow(src_argb4444, 0, dst_u, dst_v, width, &kArgbI601Constants);
|
ARGB4444ToUVMatrixRow(src_argb4444, 0, dst_u, dst_v, width,
|
||||||
|
&kArgbI601Constants);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Convert RGB24 to J400.
|
// Convert RGB24 to J400.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int RGB24ToJ400(const uint8_t* src_rgb24,
|
int RGB24ToJ400(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_yj,
|
||||||
int src_stride_rgb24,
|
int dst_stride_yj, int width, int height) {
|
||||||
uint8_t* dst_yj,
|
|
||||||
int dst_stride_yj,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||||
RGB24ToARGBRow_C;
|
RGB24ToARGBRow_C;
|
||||||
@ -3993,7 +3897,7 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
|
|||||||
RGB24ToARGBRow = RGB24ToARGBRow_RVV;
|
RGB24ToARGBRow = RGB24ToARGBRow_RVV;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
// Allocate 1 row of ARGB.
|
// Allocate 1 row of ARGB.
|
||||||
const int row_size = (width * 4 + 31) & ~31;
|
const int row_size = (width * 4 + 31) & ~31;
|
||||||
align_buffer_64(row, row_size);
|
align_buffer_64(row, row_size);
|
||||||
@ -4165,8 +4069,7 @@ int RAWToJ400(const uint8_t* src_raw,
|
|||||||
// Allocate 1 row of ARGB.
|
// Allocate 1 row of ARGB.
|
||||||
const int row_size = (width * 4 + 31) & ~31;
|
const int row_size = (width * 4 + 31) & ~31;
|
||||||
align_buffer_64(row, row_size);
|
align_buffer_64(row, row_size);
|
||||||
if (!row)
|
if (!row) return 1;
|
||||||
return 1;
|
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
RAWToARGBRow(src_raw, row, width);
|
RAWToARGBRow(src_raw, row, width);
|
||||||
|
|||||||
@ -5098,8 +5098,7 @@ int Android420ToARGBMatrix(const uint8_t* src_y,
|
|||||||
|
|
||||||
// General case fallback creates NV12
|
// General case fallback creates NV12
|
||||||
const uint64_t uv_size = (uint64_t)halfwidth * 2 * halfheight;
|
const uint64_t uv_size = (uint64_t)halfwidth * 2 * halfheight;
|
||||||
if (uv_size > SIZE_MAX)
|
if (uv_size > SIZE_MAX) return 1;
|
||||||
return 1;
|
|
||||||
align_buffer_64(plane_uv, (size_t)uv_size);
|
align_buffer_64(plane_uv, (size_t)uv_size);
|
||||||
if (!plane_uv)
|
if (!plane_uv)
|
||||||
return 1;
|
return 1;
|
||||||
|
|||||||
@ -35,8 +35,8 @@ int ARGBToI444(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
return ARGBToI444Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
return ARGBToI444Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kArgbI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kArgbI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
@ -188,8 +188,8 @@ int ARGBToI422(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
return ARGBToI422Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
return ARGBToI422Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kArgbI601Constants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kArgbI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
@ -275,34 +275,34 @@ ARGBToUVMatrixRow_C;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
||||||
if (TestCpuFlag(kCpuHasSVE2)) {
|
if (TestCpuFlag(kCpuHasSVE2)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
||||||
if (TestCpuFlag(kCpuHasSME)) {
|
if (TestCpuFlag(kCpuHasSME)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
@ -445,34 +445,34 @@ ARGBToUVMatrixRow_C;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
||||||
if (TestCpuFlag(kCpuHasSVE2)) {
|
if (TestCpuFlag(kCpuHasSVE2)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
||||||
if (TestCpuFlag(kCpuHasSME)) {
|
if (TestCpuFlag(kCpuHasSME)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
@ -579,14 +579,10 @@ ARGBToUVMatrixRow_C;
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ARGBToNV21Matrix(const uint8_t* src_argb,
|
int ARGBToNV21Matrix(const uint8_t* src_argb, int src_stride_argb,
|
||||||
int src_stride_argb,
|
uint8_t* dst_y, int dst_stride_y, uint8_t* dst_vu,
|
||||||
uint8_t* dst_y,
|
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_vu,
|
|
||||||
int dst_stride_uv,
|
int dst_stride_uv,
|
||||||
const struct ArgbConstants* argbconstants,
|
const struct ArgbConstants* argbconstants, int width,
|
||||||
int width,
|
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
int halfwidth = (width + 1) >> 1;
|
int halfwidth = (width + 1) >> 1;
|
||||||
@ -595,7 +591,7 @@ int ARGBToNV21Matrix(const uint8_t* src_argb,
|
|||||||
void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb,
|
void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
const struct ArgbConstants* c) =
|
const struct ArgbConstants* c) =
|
||||||
ARGBToUVMatrixRow_C;
|
ARGBToUVMatrixRow_C;
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOYMATRIXROW_SSSE3)
|
#if defined(HAS_ARGBTOYMATRIXROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
@ -660,34 +656,34 @@ ARGBToUVMatrixRow_C;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
||||||
if (TestCpuFlag(kCpuHasSVE2)) {
|
if (TestCpuFlag(kCpuHasSVE2)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
||||||
if (TestCpuFlag(kCpuHasSME)) {
|
if (TestCpuFlag(kCpuHasSME)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
@ -771,8 +767,7 @@ ARGBToUVMatrixRow_C;
|
|||||||
// Allocate a rows of uv.
|
// Allocate a rows of uv.
|
||||||
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
|
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
|
||||||
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
|
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
|
||||||
if (!row_u)
|
if (!row_u) return 1;
|
||||||
return 1;
|
|
||||||
|
|
||||||
for (y = 0; y < height - 1; y += 2) {
|
for (y = 0; y < height - 1; y += 2) {
|
||||||
ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width,
|
ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width,
|
||||||
@ -780,7 +775,7 @@ ARGBToUVMatrixRow_C;
|
|||||||
MergeUVRow(row_u, row_v, dst_vu, halfwidth);
|
MergeUVRow(row_u, row_v, dst_vu, halfwidth);
|
||||||
ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants);
|
ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants);
|
||||||
ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width,
|
ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width,
|
||||||
argbconstants);
|
argbconstants);
|
||||||
src_argb += src_stride_argb * 2;
|
src_argb += src_stride_argb * 2;
|
||||||
dst_y += dst_stride_y * 2;
|
dst_y += dst_stride_y * 2;
|
||||||
dst_vu += dst_stride_uv;
|
dst_vu += dst_stride_uv;
|
||||||
@ -794,12 +789,9 @@ ARGBToUVMatrixRow_C;
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToI400Matrix(const uint8_t* src_argb,
|
int ARGBToI400Matrix(const uint8_t* src_argb, int src_stride_argb,
|
||||||
int src_stride_argb,
|
uint8_t* dst_y, int dst_stride_y,
|
||||||
uint8_t* dst_y,
|
const struct ArgbConstants* constants, int width,
|
||||||
int dst_stride_y,
|
|
||||||
const struct ArgbConstants* constants,
|
|
||||||
int width,
|
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width,
|
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width,
|
||||||
@ -854,17 +846,15 @@ int ARGBToI400Matrix(const uint8_t* src_argb,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToYUY2Matrix(const uint8_t* src_argb,
|
int ARGBToYUY2Matrix(const uint8_t* src_argb, int src_stride_argb,
|
||||||
int src_stride_argb,
|
uint8_t* dst_yuy2, int dst_stride_yuy2,
|
||||||
uint8_t* dst_yuy2,
|
const struct ArgbConstants* constants, int width,
|
||||||
int dst_stride_yuy2,
|
|
||||||
const struct ArgbConstants* constants,
|
|
||||||
int width,
|
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb,
|
void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
const struct ArgbConstants* c) = ARGBToUVMatrixRow_C;
|
const struct ArgbConstants* c) =
|
||||||
|
ARGBToUVMatrixRow_C;
|
||||||
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width,
|
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width,
|
||||||
const struct ArgbConstants* c) = ARGBToYMatrixRow_C;
|
const struct ArgbConstants* c) = ARGBToYMatrixRow_C;
|
||||||
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
|
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
|
||||||
@ -966,17 +956,15 @@ int ARGBToYUY2Matrix(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToUYVYMatrix(const uint8_t* src_argb,
|
int ARGBToUYVYMatrix(const uint8_t* src_argb, int src_stride_argb,
|
||||||
int src_stride_argb,
|
uint8_t* dst_uyvy, int dst_stride_uyvy,
|
||||||
uint8_t* dst_uyvy,
|
const struct ArgbConstants* constants, int width,
|
||||||
int dst_stride_uyvy,
|
|
||||||
const struct ArgbConstants* constants,
|
|
||||||
int width,
|
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb,
|
void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
const struct ArgbConstants* c) = ARGBToUVMatrixRow_C;
|
const struct ArgbConstants* c) =
|
||||||
|
ARGBToUVMatrixRow_C;
|
||||||
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width,
|
void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width,
|
||||||
const struct ArgbConstants* c) = ARGBToYMatrixRow_C;
|
const struct ArgbConstants* c) = ARGBToYMatrixRow_C;
|
||||||
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
|
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
|
||||||
@ -1077,69 +1065,47 @@ int ARGBToUYVYMatrix(const uint8_t* src_argb,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Same as NV12 but U and V swapped.
|
// Same as NV12 but U and V swapped.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToNV21(const uint8_t* src_argb,
|
int ARGBToNV21(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y,
|
||||||
int src_stride_argb,
|
int dst_stride_y, uint8_t* dst_vu, int dst_stride_vu, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_vu,
|
|
||||||
int dst_stride_vu,
|
|
||||||
int width,
|
|
||||||
int height) {
|
int height) {
|
||||||
return ARGBToNV21Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_vu,
|
return ARGBToNV21Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y,
|
||||||
dst_stride_vu, &kArgbI601Constants, width, height);
|
dst_vu, dst_stride_vu, &kArgbI601Constants, width,
|
||||||
|
height);
|
||||||
}
|
}
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ABGRToNV12(const uint8_t* src_abgr,
|
int ABGRToNV12(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y,
|
||||||
int src_stride_abgr,
|
int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_uv,
|
|
||||||
int dst_stride_uv,
|
|
||||||
int width,
|
|
||||||
int height) {
|
int height) {
|
||||||
return ARGBToNV12Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_uv,
|
return ARGBToNV12Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y,
|
||||||
dst_stride_uv, &kAbgrI601Constants, width, height);
|
dst_uv, dst_stride_uv, &kAbgrI601Constants, width,
|
||||||
|
height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Same as NV12 but U and V swapped.
|
// Same as NV12 but U and V swapped.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ABGRToNV21(const uint8_t* src_abgr,
|
int ABGRToNV21(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y,
|
||||||
int src_stride_abgr,
|
int dst_stride_y, uint8_t* dst_vu, int dst_stride_vu, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_vu,
|
|
||||||
int dst_stride_vu,
|
|
||||||
int width,
|
|
||||||
int height) {
|
int height) {
|
||||||
return ARGBToNV21Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_vu,
|
return ARGBToNV21Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y,
|
||||||
dst_stride_vu, &kAbgrI601Constants, width, height);
|
dst_vu, dst_stride_vu, &kAbgrI601Constants, width,
|
||||||
|
height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ARGB to YUY2.
|
// Convert ARGB to YUY2.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToYUY2(const uint8_t* src_argb,
|
int ARGBToYUY2(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yuy2,
|
||||||
int src_stride_argb,
|
int dst_stride_yuy2, int width, int height) {
|
||||||
uint8_t* dst_yuy2,
|
|
||||||
int dst_stride_yuy2,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToYUY2Matrix(src_argb, src_stride_argb, dst_yuy2, dst_stride_yuy2,
|
return ARGBToYUY2Matrix(src_argb, src_stride_argb, dst_yuy2, dst_stride_yuy2,
|
||||||
&kArgbI601Constants, width, height);
|
&kArgbI601Constants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ARGB to UYVY.
|
// Convert ARGB to UYVY.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToUYVY(const uint8_t* src_argb,
|
int ARGBToUYVY(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_uyvy,
|
||||||
int src_stride_argb,
|
int dst_stride_uyvy, int width, int height) {
|
||||||
uint8_t* dst_uyvy,
|
|
||||||
int dst_stride_uyvy,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToUYVYMatrix(src_argb, src_stride_argb, dst_uyvy, dst_stride_uyvy,
|
return ARGBToUYVYMatrix(src_argb, src_stride_argb, dst_uyvy, dst_stride_uyvy,
|
||||||
&kArgbI601Constants, width, height);
|
&kArgbI601Constants, width, height);
|
||||||
}
|
}
|
||||||
@ -1808,63 +1774,38 @@ int ARGBToAR30(const uint8_t* src_argb,
|
|||||||
|
|
||||||
// ARGB little endian (bgra in memory) to J444
|
// ARGB little endian (bgra in memory) to J444
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToJ444(const uint8_t* src_argb,
|
int ARGBToJ444(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y,
|
||||||
int src_stride_argb,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI444Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
return ARGBToI444Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kArgbJPEGConstants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kArgbJPEGConstants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ARGB to J420. (JPeg full range I420).
|
// Convert ARGB to J420. (JPeg full range I420).
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToJ420(const uint8_t* src_argb,
|
int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y,
|
||||||
int src_stride_argb,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
return ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kArgbJPEGConstants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kArgbJPEGConstants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ARGB to J422. (JPeg full range I422).
|
// Convert ARGB to J422. (JPeg full range I422).
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToJ422(const uint8_t* src_argb,
|
int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y,
|
||||||
int src_stride_argb,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI422Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
return ARGBToI422Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kArgbJPEGConstants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kArgbJPEGConstants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ARGB to J400.
|
// Convert ARGB to J400.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ARGBToJ400(const uint8_t* src_argb,
|
int ARGBToJ400(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y,
|
||||||
int src_stride_argb,
|
int dst_stride_y, int width, int height) {
|
||||||
uint8_t* dst_y,
|
|
||||||
int dst_stride_y,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI400Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y,
|
return ARGBToI400Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y,
|
||||||
&kArgbJPEGConstants, width, height);
|
&kArgbJPEGConstants, width, height);
|
||||||
}
|
}
|
||||||
@ -1967,36 +1908,22 @@ int RGBAToJ400(const uint8_t* src_rgba,
|
|||||||
|
|
||||||
// Convert ABGR to J420. (JPeg full range I420).
|
// Convert ABGR to J420. (JPeg full range I420).
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ABGRToJ420(const uint8_t* src_abgr,
|
int ABGRToJ420(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y,
|
||||||
int src_stride_abgr,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI420Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u,
|
return ARGBToI420Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kAbgrJPEGConstants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kAbgrJPEGConstants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ABGR to J422. (JPeg full range I422).
|
// Convert ABGR to J422. (JPeg full range I422).
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int ABGRToJ422(const uint8_t* src_abgr,
|
int ABGRToJ422(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y,
|
||||||
int src_stride_abgr,
|
int dst_stride_y, uint8_t* dst_u, int dst_stride_u,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_v, int dst_stride_v, int width, int height) {
|
||||||
int dst_stride_y,
|
|
||||||
uint8_t* dst_u,
|
|
||||||
int dst_stride_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int dst_stride_v,
|
|
||||||
int width,
|
|
||||||
int height) {
|
|
||||||
return ARGBToI422Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u,
|
return ARGBToI422Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u,
|
||||||
dst_stride_u, dst_v, dst_stride_v, &kAbgrJPEGConstants,
|
dst_stride_u, dst_v, dst_stride_v,
|
||||||
width, height);
|
&kAbgrJPEGConstants, width, height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert ABGR to J400.
|
// Convert ABGR to J400.
|
||||||
@ -2298,34 +2225,34 @@ int RAWToNV21Matrix(const uint8_t* src_raw,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SVE2)
|
||||||
if (TestCpuFlag(kCpuHasSVE2)) {
|
if (TestCpuFlag(kCpuHasSVE2)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SME)
|
||||||
if (TestCpuFlag(kCpuHasSME)) {
|
if (TestCpuFlag(kCpuHasSME)) {
|
||||||
if (IS_ALIGNED(width, 2)) {
|
if (IS_ALIGNED(width, 2)) {
|
||||||
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3)
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
|
|||||||
@ -8,13 +8,13 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "libyuv/convert_from_argb.h" // For ArgbConstants
|
|
||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <string.h> // for memset()
|
#include <string.h> // for memset()
|
||||||
|
|
||||||
|
#include "libyuv/convert_from_argb.h" // For ArgbConstants
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/row.h"
|
#include "libyuv/row.h"
|
||||||
#include "libyuv/scale_row.h" // for ScaleRowDown2
|
#include "libyuv/scale_row.h" // for ScaleRowDown2
|
||||||
@ -630,6 +630,14 @@ void SplitUVPlane(const uint8_t* src_uv,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_SPLITUVROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
SplitUVRow = SplitUVRow_Any_AVX512BW;
|
||||||
|
if (IS_ALIGNED(width, 64)) {
|
||||||
|
SplitUVRow = SplitUVRow_AVX512BW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_SPLITUVROW_NEON)
|
#if defined(HAS_SPLITUVROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
SplitUVRow = SplitUVRow_Any_NEON;
|
SplitUVRow = SplitUVRow_Any_NEON;
|
||||||
@ -2588,6 +2596,14 @@ void MirrorPlane(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_MIRRORROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
|
MirrorRow = MirrorRow_Any_AVX512BW;
|
||||||
|
if (IS_ALIGNED(width, 64)) {
|
||||||
|
MirrorRow = MirrorRow_AVX512BW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_MIRRORROW_LSX)
|
#if defined(HAS_MIRRORROW_LSX)
|
||||||
if (TestCpuFlag(kCpuHasLSX)) {
|
if (TestCpuFlag(kCpuHasLSX)) {
|
||||||
MirrorRow = MirrorRow_Any_LSX;
|
MirrorRow = MirrorRow_Any_LSX;
|
||||||
|
|||||||
@ -8,11 +8,11 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "libyuv/rotate.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
||||||
#include "libyuv/rotate.h"
|
|
||||||
|
|
||||||
#include "libyuv/convert.h"
|
#include "libyuv/convert.h"
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
@ -403,6 +403,11 @@ void SplitRotateUV180(const uint8_t* src,
|
|||||||
MirrorSplitUVRow = MirrorSplitUVRow_AVX2;
|
MirrorSplitUVRow = MirrorSplitUVRow_AVX2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_MIRRORSPLITUVROW_AVX512BW)
|
||||||
|
if (TestCpuFlag(kCpuHasAVX512BW) && IS_ALIGNED(width, 32)) {
|
||||||
|
MirrorSplitUVRow = MirrorSplitUVRow_AVX512BW;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_MIRRORSPLITUVROW_LSX)
|
#if defined(HAS_MIRRORSPLITUVROW_LSX)
|
||||||
if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 32)) {
|
if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 32)) {
|
||||||
MirrorSplitUVRow = MirrorSplitUVRow_LSX;
|
MirrorSplitUVRow = MirrorSplitUVRow_LSX;
|
||||||
|
|||||||
@ -1500,12 +1500,8 @@ ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
|
|||||||
ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
|
ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_ARGBSHUFFLEROW_AVX512BW
|
#ifdef HAS_ARGBSHUFFLEROW_AVX512BW
|
||||||
ANY11P(ARGBShuffleRow_Any_AVX512BW,
|
ANY11P(ARGBShuffleRow_Any_AVX512BW, ARGBShuffleRow_AVX512BW, const uint8_t*, 4,
|
||||||
ARGBShuffleRow_AVX512BW,
|
4, 31)
|
||||||
const uint8_t*,
|
|
||||||
4,
|
|
||||||
4,
|
|
||||||
31)
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_ARGBSHUFFLEROW_NEON
|
#ifdef HAS_ARGBSHUFFLEROW_NEON
|
||||||
ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
|
ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
|
||||||
@ -1849,13 +1845,8 @@ ANY11I(InterpolateRow_16_Any_NEON,
|
|||||||
7)
|
7)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_INTERPOLATEROW_16_AVX2
|
#ifdef HAS_INTERPOLATEROW_16_AVX2
|
||||||
ANY11I(InterpolateRow_16_Any_AVX2,
|
ANY11I(InterpolateRow_16_Any_AVX2, InterpolateRow_16_AVX2, uint16_t, uint16_t,
|
||||||
InterpolateRow_16_AVX2,
|
1, 1, 15)
|
||||||
uint16_t,
|
|
||||||
uint16_t,
|
|
||||||
1,
|
|
||||||
1,
|
|
||||||
15)
|
|
||||||
#endif
|
#endif
|
||||||
#undef ANY11I
|
#undef ANY11I
|
||||||
|
|
||||||
@ -1919,6 +1910,9 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2,
|
|||||||
memcpy(dst_ptr + np * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
|
memcpy(dst_ptr + np * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAS_MIRRORROW_AVX512BW
|
||||||
|
ANY11M(MirrorRow_Any_AVX512BW, MirrorRow_AVX512BW, 1, 63)
|
||||||
|
#endif
|
||||||
#ifdef HAS_MIRRORROW_AVX2
|
#ifdef HAS_MIRRORROW_AVX2
|
||||||
ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
|
ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
|
||||||
#endif
|
#endif
|
||||||
@ -2022,6 +2016,9 @@ ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3)
|
|||||||
#ifdef HAS_SPLITUVROW_SSE2
|
#ifdef HAS_SPLITUVROW_SSE2
|
||||||
ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
|
ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAS_SPLITUVROW_AVX512BW
|
||||||
|
ANY12(SplitUVRow_Any_AVX512BW, SplitUVRow_AVX512BW, 0, 2, 0, 63)
|
||||||
|
#endif
|
||||||
#ifdef HAS_SPLITUVROW_AVX2
|
#ifdef HAS_SPLITUVROW_AVX2
|
||||||
ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
|
ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
|
||||||
#endif
|
#endif
|
||||||
@ -2291,6 +2288,9 @@ ANY12MS(ARGB4444ToUVMatrixRow_Any_AVX2, ARGB4444ToUVMatrixRow_AVX2, 0, 2, 31)
|
|||||||
#ifdef HAS_ARGBTOUVMATRIXROW_AVX512BW
|
#ifdef HAS_ARGBTOUVMATRIXROW_AVX512BW
|
||||||
ANY12MS(ARGBToUVMatrixRow_Any_AVX512BW, ARGBToUVMatrixRow_AVX512BW, 0, 4, 63)
|
ANY12MS(ARGBToUVMatrixRow_Any_AVX512BW, ARGBToUVMatrixRow_AVX512BW, 0, 4, 63)
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAS_RGBTOUVMATRIXROW_AVX512BW
|
||||||
|
ANY12MS(RGBToUVMatrixRow_Any_AVX512BW, RGBToUVMatrixRow_AVX512BW, 0, 3, 63)
|
||||||
|
#endif
|
||||||
#ifdef HAS_ARGBTOUVMATRIXROW_SSSE3
|
#ifdef HAS_ARGBTOUVMATRIXROW_SSSE3
|
||||||
ANY12MS(ARGBToUVMatrixRow_Any_SSSE3, ARGBToUVMatrixRow_SSSE3, 0, 4, 7)
|
ANY12MS(ARGBToUVMatrixRow_Any_SSSE3, ARGBToUVMatrixRow_SSSE3, 0, 4, 7)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -749,31 +749,25 @@ MAKEROWYJ(ABGR, 0, 1, 2, 4)
|
|||||||
MAKEROWYJ(RGBA, 3, 2, 1, 4)
|
MAKEROWYJ(RGBA, 3, 2, 1, 4)
|
||||||
#undef MAKEROWYJ
|
#undef MAKEROWYJ
|
||||||
|
|
||||||
static __inline uint8_t RGBToYMatrix(uint8_t b0,
|
static __inline uint8_t RGBToYMatrix(uint8_t b0, uint8_t b1, uint8_t b2,
|
||||||
uint8_t b1,
|
|
||||||
uint8_t b2,
|
|
||||||
uint8_t b3,
|
uint8_t b3,
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
return (c->kRGBToY[0] * b0 + c->kRGBToY[1] * b1 + c->kRGBToY[2] * b2 +
|
return (c->kRGBToY[0] * b0 + c->kRGBToY[1] * b1 + c->kRGBToY[2] * b2 +
|
||||||
c->kRGBToY[3] * b3 + c->kAddY[0]) >>
|
c->kRGBToY[3] * b3 + c->kAddY[0]) >>
|
||||||
8;
|
8;
|
||||||
}
|
}
|
||||||
static __inline uint8_t RGBToUMatrix(uint8_t b0,
|
static __inline uint8_t RGBToUMatrix(uint8_t b0, uint8_t b1, uint8_t b2,
|
||||||
uint8_t b1,
|
|
||||||
uint8_t b2,
|
|
||||||
uint8_t b3,
|
uint8_t b3,
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
return (c->kAddUV[0] - (c->kRGBToU[0] * b0 + c->kRGBToU[1] * b1 +
|
return (c->kAddUV[0] - (c->kRGBToU[0] * b0 + c->kRGBToU[1] * b1 +
|
||||||
c->kRGBToU[2] * b2 + c->kRGBToU[3] * b3)) >>
|
c->kRGBToU[2] * b2 + c->kRGBToU[3] * b3)) >>
|
||||||
8;
|
8;
|
||||||
}
|
}
|
||||||
static __inline uint8_t RGBToVMatrix(uint8_t b0,
|
static __inline uint8_t RGBToVMatrix(uint8_t b0, uint8_t b1, uint8_t b2,
|
||||||
uint8_t b1,
|
|
||||||
uint8_t b2,
|
|
||||||
uint8_t b3,
|
uint8_t b3,
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
return (c->kAddUV[0] - (c->kRGBToV[0] * b0 + c->kRGBToV[1] * b1 +
|
return (c->kAddUV[0] - (c->kRGBToV[0] * b0 + c->kRGBToV[1] * b1 +
|
||||||
c->kRGBToV[2] * b2 + c->kRGBToV[3] * b3)) >>
|
c->kRGBToV[2] * b2 + c->kRGBToV[3] * b3)) >>
|
||||||
8;
|
8;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -783,7 +777,8 @@ void ARGBToYMatrixRow_C(const uint8_t* src_argb,
|
|||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
int x;
|
int x;
|
||||||
for (x = 0; x < width; ++x) {
|
for (x = 0; x < width; ++x) {
|
||||||
dst_y[0] = RGBToYMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c);
|
dst_y[0] =
|
||||||
|
RGBToYMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c);
|
||||||
src_argb += 4;
|
src_argb += 4;
|
||||||
dst_y += 1;
|
dst_y += 1;
|
||||||
}
|
}
|
||||||
@ -1513,18 +1508,18 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
|||||||
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
|
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
|
||||||
YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
|
YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
|
||||||
|
|
||||||
#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
|
#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
|
||||||
extern const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \
|
extern const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \
|
||||||
ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \
|
ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \
|
||||||
-(RV), 0, AY, AUV); \
|
-(RV), 0, AY, AUV); \
|
||||||
extern const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \
|
extern const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \
|
||||||
ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \
|
ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \
|
||||||
-(BV), 0, AY, AUV); \
|
-(BV), 0, AY, AUV); \
|
||||||
extern const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \
|
extern const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \
|
||||||
ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \
|
ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \
|
||||||
-(GV), -(RV), AY, AUV); \
|
-(GV), -(RV), AY, AUV); \
|
||||||
extern const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \
|
extern const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \
|
||||||
ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \
|
ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \
|
||||||
-(GV), -(BV), AY, AUV);
|
-(GV), -(BV), AY, AUV);
|
||||||
|
|
||||||
// BT.601 limited range RGB to YUV coefficients
|
// BT.601 limited range RGB to YUV coefficients
|
||||||
@ -4556,9 +4551,7 @@ void HalfMergeUVRow_C(const uint8_t* src_u,
|
|||||||
|
|
||||||
#undef STATIC_CAST
|
#undef STATIC_CAST
|
||||||
|
|
||||||
void RGBToYMatrixRow_C(const uint8_t* src_rgb,
|
void RGBToYMatrixRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
@ -4571,11 +4564,8 @@ void RGBToYMatrixRow_C(const uint8_t* src_rgb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBToUVMatrixRow_C(const uint8_t* src_rgb,
|
void RGBToUVMatrixRow_C(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
int src_stride_rgb,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
@ -4591,9 +4581,7 @@ void RGBToUVMatrixRow_C(const uint8_t* src_rgb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) && defined(HAS_RGB24TOARGBROW_AVX2)
|
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) && defined(HAS_RGB24TOARGBROW_AVX2)
|
||||||
void RGBToYMatrixRow_AVX2(const uint8_t* src_rgb,
|
void RGBToYMatrixRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_y, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
@ -4608,18 +4596,14 @@ void RGBToYMatrixRow_AVX2(const uint8_t* src_rgb,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) && defined(HAS_RGB24TOARGBROW_AVX2)
|
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) && defined(HAS_RGB24TOARGBROW_AVX2)
|
||||||
void RGBToUVMatrixRow_AVX2(const uint8_t* src_rgb,
|
void RGBToUVMatrixRow_AVX2(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
int src_stride_rgb,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
RGB24ToARGBRow_AVX2(src_rgb, row, twidth);
|
RGB24ToARGBRow_AVX2(src_rgb, row, twidth);
|
||||||
RGB24ToARGBRow_AVX2(src_rgb + src_stride_rgb,
|
RGB24ToARGBRow_AVX2(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4, twidth);
|
||||||
row + MAXTWIDTH * 4, twidth);
|
|
||||||
ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
src_rgb += twidth * 3;
|
src_rgb += twidth * 3;
|
||||||
dst_u += twidth / 2;
|
dst_u += twidth / 2;
|
||||||
@ -4629,12 +4613,29 @@ void RGBToUVMatrixRow_AVX2(const uint8_t* src_rgb,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) && \
|
||||||
|
defined(HAS_RGB24TOARGBROW_AVX512BW)
|
||||||
|
void RGBToUVMatrixRow_AVX512BW(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
|
const struct ArgbConstants* c) {
|
||||||
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
|
while (width > 0) {
|
||||||
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
|
RGB24ToARGBRow_AVX512BW(src_rgb, row, twidth);
|
||||||
|
RGB24ToARGBRow_AVX512BW(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4,
|
||||||
|
twidth);
|
||||||
|
ARGBToUVMatrixRow_AVX512BW(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
|
src_rgb += twidth * 3;
|
||||||
|
dst_u += twidth / 2;
|
||||||
|
dst_v += twidth / 2;
|
||||||
|
width -= twidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_RGB24TOARGBROW_NEON)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_RGB24TOARGBROW_NEON)
|
||||||
void RGBToUVMatrixRow_NEON(const uint8_t* src_rgb,
|
void RGBToUVMatrixRow_NEON(const uint8_t* src_rgb, int src_stride_rgb,
|
||||||
int src_stride_rgb,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
@ -4650,9 +4651,7 @@ void RGBToUVMatrixRow_NEON(const uint8_t* src_rgb,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void RGB565ToYMatrixRow_C(const uint8_t* src_rgb565,
|
void RGB565ToYMatrixRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width,
|
||||||
uint8_t* dst_y,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
@ -4665,17 +4664,15 @@ void RGB565ToYMatrixRow_C(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB565ToUVMatrixRow_C(const uint8_t* src_rgb565,
|
void RGB565ToUVMatrixRow_C(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||||
int src_stride_rgb565,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
RGB565ToARGBRow_C(src_rgb565, row, twidth);
|
RGB565ToARGBRow_C(src_rgb565, row, twidth);
|
||||||
RGB565ToARGBRow_C(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4, twidth);
|
RGB565ToARGBRow_C(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4,
|
||||||
|
twidth);
|
||||||
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
src_rgb565 += twidth * 2;
|
src_rgb565 += twidth * 2;
|
||||||
dst_u += twidth / 2;
|
dst_u += twidth / 2;
|
||||||
@ -4685,10 +4682,8 @@ void RGB565ToUVMatrixRow_C(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) && defined(HAS_RGB565TOARGBROW_AVX2)
|
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) && defined(HAS_RGB565TOARGBROW_AVX2)
|
||||||
void RGB565ToYMatrixRow_AVX2(const uint8_t* src_rgb565,
|
void RGB565ToYMatrixRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
@ -4702,18 +4697,15 @@ void RGB565ToYMatrixRow_AVX2(const uint8_t* src_rgb565,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) && defined(HAS_RGB565TOARGBROW_AVX2)
|
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) && defined(HAS_RGB565TOARGBROW_AVX2)
|
||||||
void RGB565ToUVMatrixRow_AVX2(const uint8_t* src_rgb565,
|
void RGB565ToUVMatrixRow_AVX2(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||||
int src_stride_rgb565,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
RGB565ToARGBRow_AVX2(src_rgb565, row, twidth);
|
RGB565ToARGBRow_AVX2(src_rgb565, row, twidth);
|
||||||
RGB565ToARGBRow_AVX2(src_rgb565 + src_stride_rgb565,
|
RGB565ToARGBRow_AVX2(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4,
|
||||||
row + MAXTWIDTH * 4, twidth);
|
twidth);
|
||||||
ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
src_rgb565 += twidth * 2;
|
src_rgb565 += twidth * 2;
|
||||||
dst_u += twidth / 2;
|
dst_u += twidth / 2;
|
||||||
@ -4724,10 +4716,8 @@ void RGB565ToUVMatrixRow_AVX2(const uint8_t* src_rgb565,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_RGB565TOARGBROW_NEON) && defined(HAS_ARGBTOYMATRIXROW_NEON)
|
#if defined(HAS_RGB565TOARGBROW_NEON) && defined(HAS_ARGBTOYMATRIXROW_NEON)
|
||||||
void RGB565ToYMatrixRow_NEON(const uint8_t* src_rgb565,
|
void RGB565ToYMatrixRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
@ -4741,17 +4731,15 @@ void RGB565ToYMatrixRow_NEON(const uint8_t* src_rgb565,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_RGB565TOARGBROW_NEON) && defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
#if defined(HAS_RGB565TOARGBROW_NEON) && defined(HAS_ARGBTOUVMATRIXROW_NEON)
|
||||||
void RGB565ToUVMatrixRow_NEON(const uint8_t* src_rgb565,
|
void RGB565ToUVMatrixRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||||
int src_stride_rgb565,
|
uint8_t* dst_u, uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
RGB565ToARGBRow_NEON(src_rgb565, row, twidth);
|
RGB565ToARGBRow_NEON(src_rgb565, row, twidth);
|
||||||
RGB565ToARGBRow_NEON(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4, twidth);
|
RGB565ToARGBRow_NEON(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4,
|
||||||
|
twidth);
|
||||||
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
src_rgb565 += twidth * 2;
|
src_rgb565 += twidth * 2;
|
||||||
dst_u += twidth / 2;
|
dst_u += twidth / 2;
|
||||||
@ -4761,10 +4749,8 @@ void RGB565ToUVMatrixRow_NEON(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void ARGB1555ToYMatrixRow_C(const uint8_t* src_argb1555,
|
void ARGB1555ToYMatrixRow_C(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
@ -4777,16 +4763,15 @@ void ARGB1555ToYMatrixRow_C(const uint8_t* src_argb1555,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGB1555ToUVMatrixRow_C(const uint8_t* src_argb1555,
|
void ARGB1555ToUVMatrixRow_C(const uint8_t* src_argb1555,
|
||||||
int src_stride_argb1555,
|
int src_stride_argb1555, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
ARGB1555ToARGBRow_C(src_argb1555, row, twidth);
|
ARGB1555ToARGBRow_C(src_argb1555, row, twidth);
|
||||||
ARGB1555ToARGBRow_C(src_argb1555 + src_stride_argb1555, row + MAXTWIDTH * 4, twidth);
|
ARGB1555ToARGBRow_C(src_argb1555 + src_stride_argb1555, row + MAXTWIDTH * 4,
|
||||||
|
twidth);
|
||||||
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
src_argb1555 += twidth * 2;
|
src_argb1555 += twidth * 2;
|
||||||
dst_u += twidth / 2;
|
dst_u += twidth / 2;
|
||||||
@ -4795,10 +4780,8 @@ void ARGB1555ToUVMatrixRow_C(const uint8_t* src_argb1555,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGB4444ToYMatrixRow_C(const uint8_t* src_argb4444,
|
void ARGB4444ToYMatrixRow_C(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
@ -4811,16 +4794,15 @@ void ARGB4444ToYMatrixRow_C(const uint8_t* src_argb4444,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGB4444ToUVMatrixRow_C(const uint8_t* src_argb4444,
|
void ARGB4444ToUVMatrixRow_C(const uint8_t* src_argb4444,
|
||||||
int src_stride_argb4444,
|
int src_stride_argb4444, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
ARGB4444ToARGBRow_C(src_argb4444, row, twidth);
|
ARGB4444ToARGBRow_C(src_argb4444, row, twidth);
|
||||||
ARGB4444ToARGBRow_C(src_argb4444 + src_stride_argb4444, row + MAXTWIDTH * 4, twidth);
|
ARGB4444ToARGBRow_C(src_argb4444 + src_stride_argb4444, row + MAXTWIDTH * 4,
|
||||||
|
twidth);
|
||||||
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
src_argb4444 += twidth * 2;
|
src_argb4444 += twidth * 2;
|
||||||
dst_u += twidth / 2;
|
dst_u += twidth / 2;
|
||||||
@ -4831,10 +4813,8 @@ void ARGB4444ToUVMatrixRow_C(const uint8_t* src_argb4444,
|
|||||||
|
|
||||||
#if defined(HAS_ARGBTOYMATRIXROW_AVX2)
|
#if defined(HAS_ARGBTOYMATRIXROW_AVX2)
|
||||||
#if defined(HAS_ARGB1555TOARGBROW_AVX2)
|
#if defined(HAS_ARGB1555TOARGBROW_AVX2)
|
||||||
void ARGB1555ToYMatrixRow_AVX2(const uint8_t* src_argb1555,
|
void ARGB1555ToYMatrixRow_AVX2(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
@ -4848,10 +4828,8 @@ void ARGB1555ToYMatrixRow_AVX2(const uint8_t* src_argb1555,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
|
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
|
||||||
void ARGB4444ToYMatrixRow_AVX2(const uint8_t* src_argb4444,
|
void ARGB4444ToYMatrixRow_AVX2(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
@ -4868,10 +4846,8 @@ void ARGB4444ToYMatrixRow_AVX2(const uint8_t* src_argb4444,
|
|||||||
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
|
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
|
||||||
#if defined(HAS_ARGB1555TOARGBROW_AVX2)
|
#if defined(HAS_ARGB1555TOARGBROW_AVX2)
|
||||||
void ARGB1555ToUVMatrixRow_AVX2(const uint8_t* src_argb1555,
|
void ARGB1555ToUVMatrixRow_AVX2(const uint8_t* src_argb1555,
|
||||||
int src_stride_argb1555,
|
int src_stride_argb1555, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
@ -4890,10 +4866,8 @@ void ARGB1555ToUVMatrixRow_AVX2(const uint8_t* src_argb1555,
|
|||||||
|
|
||||||
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
|
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
|
||||||
void ARGB4444ToUVMatrixRow_AVX2(const uint8_t* src_argb4444,
|
void ARGB4444ToUVMatrixRow_AVX2(const uint8_t* src_argb4444,
|
||||||
int src_stride_argb4444,
|
int src_stride_argb4444, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
@ -4912,10 +4886,8 @@ void ARGB4444ToUVMatrixRow_AVX2(const uint8_t* src_argb4444,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOYMATRIXROW_NEON) && defined(HAS_ARGB1555TOARGBROW_NEON)
|
#if defined(HAS_ARGBTOYMATRIXROW_NEON) && defined(HAS_ARGB1555TOARGBROW_NEON)
|
||||||
void ARGB1555ToYMatrixRow_NEON(const uint8_t* src_argb1555,
|
void ARGB1555ToYMatrixRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
@ -4929,10 +4901,8 @@ void ARGB1555ToYMatrixRow_NEON(const uint8_t* src_argb1555,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAS_ARGBTOYMATRIXROW_NEON) && defined(HAS_ARGB4444TOARGBROW_NEON)
|
#if defined(HAS_ARGBTOYMATRIXROW_NEON) && defined(HAS_ARGB4444TOARGBROW_NEON)
|
||||||
void ARGB4444ToYMatrixRow_NEON(const uint8_t* src_argb4444,
|
void ARGB4444ToYMatrixRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
uint8_t* dst_y,
|
int width, const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
@ -4947,16 +4917,15 @@ void ARGB4444ToYMatrixRow_NEON(const uint8_t* src_argb4444,
|
|||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_ARGB1555TOARGBROW_NEON)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_ARGB1555TOARGBROW_NEON)
|
||||||
void ARGB1555ToUVMatrixRow_NEON(const uint8_t* src_argb1555,
|
void ARGB1555ToUVMatrixRow_NEON(const uint8_t* src_argb1555,
|
||||||
int src_stride_argb1555,
|
int src_stride_argb1555, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
ARGB1555ToARGBRow_NEON(src_argb1555, row, twidth);
|
ARGB1555ToARGBRow_NEON(src_argb1555, row, twidth);
|
||||||
ARGB1555ToARGBRow_NEON(src_argb1555 + src_stride_argb1555, row + MAXTWIDTH * 4, twidth);
|
ARGB1555ToARGBRow_NEON(src_argb1555 + src_stride_argb1555,
|
||||||
|
row + MAXTWIDTH * 4, twidth);
|
||||||
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
src_argb1555 += twidth * 2;
|
src_argb1555 += twidth * 2;
|
||||||
dst_u += twidth / 2;
|
dst_u += twidth / 2;
|
||||||
@ -4968,16 +4937,15 @@ void ARGB1555ToUVMatrixRow_NEON(const uint8_t* src_argb1555,
|
|||||||
|
|
||||||
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_ARGB4444TOARGBROW_NEON)
|
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_ARGB4444TOARGBROW_NEON)
|
||||||
void ARGB4444ToUVMatrixRow_NEON(const uint8_t* src_argb4444,
|
void ARGB4444ToUVMatrixRow_NEON(const uint8_t* src_argb4444,
|
||||||
int src_stride_argb4444,
|
int src_stride_argb4444, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||||
ARGB4444ToARGBRow_NEON(src_argb4444, row, twidth);
|
ARGB4444ToARGBRow_NEON(src_argb4444, row, twidth);
|
||||||
ARGB4444ToARGBRow_NEON(src_argb4444 + src_stride_argb4444, row + MAXTWIDTH * 4, twidth);
|
ARGB4444ToARGBRow_NEON(src_argb4444 + src_stride_argb4444,
|
||||||
|
row + MAXTWIDTH * 4, twidth);
|
||||||
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
|
||||||
src_argb4444 += twidth * 2;
|
src_argb4444 += twidth * 2;
|
||||||
dst_u += twidth / 2;
|
dst_u += twidth / 2;
|
||||||
|
|||||||
@ -120,11 +120,11 @@ static const lvec8 kShuffleNV21 = {
|
|||||||
|
|
||||||
#if defined(HAS_J400TOARGBROW_AVX2) || defined(HAS_J400TOARGBROW_AVX512BW)
|
#if defined(HAS_J400TOARGBROW_AVX2) || defined(HAS_J400TOARGBROW_AVX512BW)
|
||||||
alignas(64) static const uint8_t kShuffleMaskJ400ToARGB[64] = {
|
alignas(64) static const uint8_t kShuffleMaskJ400ToARGB[64] = {
|
||||||
0u, 0u, 0u, 128u, 1u, 1u, 1u, 128u, 2u, 2u, 2u, 128u, 3u, 3u, 3u, 128u,
|
0u, 0u, 0u, 128u, 1u, 1u, 1u, 128u, 2u, 2u, 2u, 128u, 3u, 3u,
|
||||||
4u, 4u, 4u, 128u, 5u, 5u, 5u, 128u, 6u, 6u, 6u, 128u, 7u, 7u, 7u, 128u,
|
3u, 128u, 4u, 4u, 4u, 128u, 5u, 5u, 5u, 128u, 6u, 6u, 6u, 128u,
|
||||||
8u, 8u, 8u, 128u, 9u, 9u, 9u, 128u, 10u, 10u, 10u, 128u, 11u, 11u, 11u, 128u,
|
7u, 7u, 7u, 128u, 8u, 8u, 8u, 128u, 9u, 9u, 9u, 128u, 10u, 10u,
|
||||||
12u, 12u, 12u, 128u, 13u, 13u, 13u, 128u, 14u, 14u, 14u, 128u, 15u, 15u, 15u, 128u
|
10u, 128u, 11u, 11u, 11u, 128u, 12u, 12u, 12u, 128u, 13u, 13u, 13u, 128u,
|
||||||
};
|
14u, 14u, 14u, 128u, 15u, 15u, 15u, 128u};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_J400TOARGBROW_AVX2
|
#ifdef HAS_J400TOARGBROW_AVX2
|
||||||
@ -149,16 +149,17 @@ void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
|||||||
"sub $0x10,%2 \n"
|
"sub $0x10,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_y), // %0
|
: "+r"(src_y), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "r"(kShuffleMaskJ400ToARGB) // %3
|
: "r"(kShuffleMaskJ400ToARGB) // %3
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7");
|
||||||
}
|
}
|
||||||
#endif // HAS_J400TOARGBROW_AVX2
|
#endif // HAS_J400TOARGBROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_J400TOARGBROW_AVX512BW
|
#ifdef HAS_J400TOARGBROW_AVX512BW
|
||||||
void J400ToARGBRow_AVX512BW(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
void J400ToARGBRow_AVX512BW(const uint8_t* src_y, uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vpternlogd $0xff,%%zmm7,%%zmm7,%%zmm7 \n" // 0xffffffff
|
"vpternlogd $0xff,%%zmm7,%%zmm7,%%zmm7 \n" // 0xffffffff
|
||||||
"vpslld $0x18,%%zmm7,%%zmm7 \n" // 0xff000000
|
"vpslld $0x18,%%zmm7,%%zmm7 \n" // 0xff000000
|
||||||
@ -179,10 +180,10 @@ void J400ToARGBRow_AVX512BW(const uint8_t* src_y, uint8_t* dst_argb, int width)
|
|||||||
"sub $0x20,%2 \n"
|
"sub $0x20,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_y), // %0
|
: "+r"(src_y), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kShuffleMaskJ400ToARGB) // %3
|
: "m"(kShuffleMaskJ400ToARGB) // %3
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm5", "xmm7");
|
: "memory", "cc", "xmm0", "xmm1", "xmm5", "xmm7");
|
||||||
}
|
}
|
||||||
#endif // HAS_J400TOARGBROW_AVX512BW
|
#endif // HAS_J400TOARGBROW_AVX512BW
|
||||||
@ -221,15 +222,16 @@ void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
|
|||||||
"lea 0x40(%1),%1 \n"
|
"lea 0x40(%1),%1 \n"
|
||||||
"sub $0x10,%2 \n"
|
"sub $0x10,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_rgb24), // %0
|
: "+r"(src_rgb24), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kShuffleMaskRGB24ToARGB[0]) // %3
|
: "m"(kShuffleMaskRGB24ToARGB[0]) // %3
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_RGB24TOARGBROW_AVX2
|
#ifdef HAS_RGB24TOARGBROW_AVX2
|
||||||
void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
|
void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
// Reference to prevent discarding of kShuffleMaskRGB24ToARGB[1] which is
|
// Reference to prevent discarding of kShuffleMaskRGB24ToARGB[1] which is
|
||||||
// accessed via offset in assembly.
|
// accessed via offset in assembly.
|
||||||
const uvec8* dummy = &kShuffleMaskRGB24ToARGB[1];
|
const uvec8* dummy = &kShuffleMaskRGB24ToARGB[1];
|
||||||
@ -267,9 +269,9 @@ void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_argb, int width)
|
|||||||
"sub $0x20,%2 \n"
|
"sub $0x20,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_rgb24), // %0
|
: "+r"(src_rgb24), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kShuffleMaskRGB24ToARGB[0]) // %3
|
: "m"(kShuffleMaskRGB24ToARGB[0]) // %3
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||||
}
|
}
|
||||||
@ -399,7 +401,8 @@ void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, const uint
|
|||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kPermdRAWToARGB_AVX512BW), // %3
|
: "m"(kPermdRAWToARGB_AVX512BW), // %3
|
||||||
"m"(*shuffler) // %4
|
"m"(*shuffler) // %4
|
||||||
: "memory", "cc", "rax", "k1", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "rax", "k1", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4",
|
||||||
|
"xmm5", "xmm6");
|
||||||
}
|
}
|
||||||
|
|
||||||
void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
||||||
@ -1511,15 +1514,14 @@ void ARGBToYMatrixRow_AVX512BW(const uint8_t* src_argb,
|
|||||||
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n"
|
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n"
|
||||||
"vpsllw $15,%%zmm16,%%zmm5 \n"
|
"vpsllw $15,%%zmm16,%%zmm5 \n"
|
||||||
"vpacksswb %%zmm5,%%zmm5,%%zmm5 \n"
|
"vpacksswb %%zmm5,%%zmm5,%%zmm5 \n"
|
||||||
"vpsrlw $15,%%zmm16,%%zmm16 \n" // zmm16 = 1
|
"vpsrlw $15,%%zmm16,%%zmm16 \n" // zmm16 = 1
|
||||||
"vbroadcasti64x4 0(%3),%%zmm4 \n"
|
"vbroadcasti64x4 0(%3),%%zmm4 \n"
|
||||||
"vbroadcasti64x4 0x60(%3),%%zmm7 \n"
|
"vbroadcasti64x4 0x60(%3),%%zmm7 \n"
|
||||||
"vpmaddubsw %%zmm5,%%zmm4,%%zmm6 \n"
|
"vpmaddubsw %%zmm5,%%zmm4,%%zmm6 \n"
|
||||||
"vpmaddwd %%zmm16,%%zmm6,%%zmm6 \n"
|
"vpmaddwd %%zmm16,%%zmm6,%%zmm6 \n"
|
||||||
"vpackssdw %%zmm6,%%zmm6,%%zmm6 \n"
|
"vpackssdw %%zmm6,%%zmm6,%%zmm6 \n"
|
||||||
"vpsubw %%zmm6,%%zmm7,%%zmm7 \n"
|
"vpsubw %%zmm6,%%zmm7,%%zmm7 \n"
|
||||||
"vmovups %4,%%zmm6 \n"
|
"vmovups %4,%%zmm6 \n" LABELALIGN
|
||||||
LABELALIGN
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovups (%0),%%zmm0 \n"
|
"vmovups (%0),%%zmm0 \n"
|
||||||
"vmovups 0x40(%0),%%zmm1 \n"
|
"vmovups 0x40(%0),%%zmm1 \n"
|
||||||
@ -1551,11 +1553,11 @@ void ARGBToYMatrixRow_AVX512BW(const uint8_t* src_argb,
|
|||||||
"sub $0x40,%2 \n"
|
"sub $0x40,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "r"(c), // %3
|
: "r"(c), // %3
|
||||||
"m"(kPermdARGBToY_AVX512BW) // %4
|
"m"(kPermdARGBToY_AVX512BW) // %4
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
"xmm7", "xmm16");
|
"xmm7", "xmm16");
|
||||||
}
|
}
|
||||||
@ -1713,8 +1715,8 @@ void ARGBToUV444MatrixRow_AVX512BW(const uint8_t* src_argb,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
"vbroadcasti64x4 0x20(%4),%%zmm3 \n" // kRGBToU
|
"vbroadcasti64x4 0x20(%4),%%zmm3 \n" // kRGBToU
|
||||||
"vbroadcasti64x4 0x40(%4),%%zmm4 \n" // kRGBToV
|
"vbroadcasti64x4 0x40(%4),%%zmm4 \n" // kRGBToV
|
||||||
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n" // -1
|
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n" // -1
|
||||||
"vpsllw $15,%%zmm16,%%zmm5 \n" // 0x8000
|
"vpsllw $15,%%zmm16,%%zmm5 \n" // 0x8000
|
||||||
"vmovups %5,%%zmm7 \n"
|
"vmovups %5,%%zmm7 \n"
|
||||||
"sub %1,%2 \n"
|
"sub %1,%2 \n"
|
||||||
|
|
||||||
@ -2174,8 +2176,8 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb,
|
|||||||
"vbroadcasti64x4 0x20(%5),%%zmm4 \n" // RGBToU
|
"vbroadcasti64x4 0x20(%5),%%zmm4 \n" // RGBToU
|
||||||
"vbroadcasti64x4 0x40(%5),%%zmm5 \n" // RGBToV
|
"vbroadcasti64x4 0x40(%5),%%zmm5 \n" // RGBToV
|
||||||
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n"
|
"vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n"
|
||||||
"vpabsb %%zmm16,%%zmm6 \n" // 0x0101
|
"vpabsb %%zmm16,%%zmm6 \n" // 0x0101
|
||||||
"vpsllw $15,%%zmm16,%%zmm17 \n" // 0x8000
|
"vpsllw $15,%%zmm16,%%zmm17 \n" // 0x8000
|
||||||
"vbroadcasti64x4 %6,%%zmm7 \n" // kShuffleAARRGGBB
|
"vbroadcasti64x4 %6,%%zmm7 \n" // kShuffleAARRGGBB
|
||||||
"vmovups %7,%%zmm18 \n" // kPermdARGBToY_AVX512BW
|
"vmovups %7,%%zmm18 \n" // kPermdARGBToY_AVX512BW
|
||||||
"vmovups %8,%%zmm19 \n" // kPermdARGBToUV_AVX512BW
|
"vmovups %8,%%zmm19 \n" // kPermdARGBToUV_AVX512BW
|
||||||
@ -2209,7 +2211,8 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb,
|
|||||||
"vpmaddubsw %%zmm5,%%zmm0,%%zmm0 \n" // 16 V
|
"vpmaddubsw %%zmm5,%%zmm0,%%zmm0 \n" // 16 V
|
||||||
"vpmaddwd %%zmm16,%%zmm1,%%zmm1 \n"
|
"vpmaddwd %%zmm16,%%zmm1,%%zmm1 \n"
|
||||||
"vpmaddwd %%zmm16,%%zmm0,%%zmm0 \n"
|
"vpmaddwd %%zmm16,%%zmm0,%%zmm0 \n"
|
||||||
"vpackssdw %%zmm0,%%zmm1,%%zmm0 \n" // mutates (U in lower, V in upper)
|
"vpackssdw %%zmm0,%%zmm1,%%zmm0 \n" // mutates (U in lower, V
|
||||||
|
// in upper)
|
||||||
"vpaddw %%zmm17,%%zmm0,%%zmm0 \n"
|
"vpaddw %%zmm17,%%zmm0,%%zmm0 \n"
|
||||||
"vpsrlw $0x8,%%zmm0,%%zmm0 \n"
|
"vpsrlw $0x8,%%zmm0,%%zmm0 \n"
|
||||||
"vpackuswb %%zmm0,%%zmm0,%%zmm0 \n" // mutates
|
"vpackuswb %%zmm0,%%zmm0,%%zmm0 \n" // mutates
|
||||||
@ -4601,6 +4604,29 @@ void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
}
|
}
|
||||||
#endif // HAS_MIRRORROW_SSSE3
|
#endif // HAS_MIRRORROW_SSSE3
|
||||||
|
|
||||||
|
#ifdef HAS_MIRRORROW_AVX512BW
|
||||||
|
void MirrorRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
|
ptrdiff_t temp_width = (ptrdiff_t)(width);
|
||||||
|
asm volatile("vbroadcasti32x4 %3,%%zmm5 \n"
|
||||||
|
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
"vmovdqu8 -0x40(%0,%2,1),%%zmm0 \n"
|
||||||
|
"vpshufb %%zmm5,%%zmm0,%%zmm0 \n"
|
||||||
|
"vshufi64x2 $0x1b,%%zmm0,%%zmm0,%%zmm0 \n"
|
||||||
|
"vmovdqu8 %%zmm0,(%1) \n"
|
||||||
|
"lea 0x40(%1),%1 \n"
|
||||||
|
"sub $0x40,%2 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src), // %0
|
||||||
|
"+r"(dst), // %1
|
||||||
|
"+r"(temp_width) // %2
|
||||||
|
: "m"(kShuffleMirror) // %3
|
||||||
|
: "memory", "cc", "zmm0", "zmm5");
|
||||||
|
}
|
||||||
|
#endif // HAS_MIRRORROW_AVX512BW
|
||||||
|
|
||||||
#ifdef HAS_MIRRORROW_AVX2
|
#ifdef HAS_MIRRORROW_AVX2
|
||||||
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
ptrdiff_t temp_width = (ptrdiff_t)(width);
|
ptrdiff_t temp_width = (ptrdiff_t)(width);
|
||||||
@ -4624,14 +4650,49 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
}
|
}
|
||||||
#endif // HAS_MIRRORROW_AVX2
|
#endif // HAS_MIRRORROW_AVX2
|
||||||
|
|
||||||
#ifdef HAS_MIRRORSPLITUVROW_AVX2
|
#if defined(HAS_MIRRORSPLITUVROW_AVX2) || defined(HAS_MIRRORSPLITUVROW_AVX512BW)
|
||||||
// Shuffle table for reversing the bytes of UV channels.
|
// Shuffle table for reversing the bytes of UV channels.
|
||||||
static const uvec8 kShuffleMirrorSplitUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
|
static const uvec8 kShuffleMirrorSplitUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u,
|
||||||
15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
|
15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u};
|
||||||
|
#endif
|
||||||
|
|
||||||
void MirrorSplitUVRow_AVX2(const uint8_t* src,
|
#ifdef HAS_MIRRORSPLITUVROW_AVX512BW
|
||||||
uint8_t* dst_u,
|
static const uint64_t kMirrorSplitUVPermute[8] = {6, 4, 2, 0, 7, 5, 3, 1};
|
||||||
uint8_t* dst_v,
|
|
||||||
|
void MirrorSplitUVRow_AVX512BW(const uint8_t* src, uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v, int width) {
|
||||||
|
ptrdiff_t temp_width = (ptrdiff_t)(width);
|
||||||
|
asm volatile(
|
||||||
|
"vbroadcasti32x4 %4,%%zmm1 \n"
|
||||||
|
"lea -0x40(%0,%3,2),%0 \n"
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
"vmovdqu64 %5,%%zmm3 \n"
|
||||||
|
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
"vmovdqu8 (%0),%%zmm0 \n"
|
||||||
|
"lea -0x40(%0),%0 \n"
|
||||||
|
"vpshufb %%zmm1,%%zmm0,%%zmm0 \n"
|
||||||
|
"vpermq %%zmm0,%%zmm3,%%zmm0 \n"
|
||||||
|
"vextracti64x4 $0x1,%%zmm0,%%ymm2 \n"
|
||||||
|
"vmovdqu %%ymm0,(%1) \n"
|
||||||
|
"vmovdqu %%ymm2,0x00(%1,%2,1) \n"
|
||||||
|
"lea 0x20(%1),%1 \n"
|
||||||
|
"sub $0x20,%3 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+r"(temp_width) // %3
|
||||||
|
: "m"(kShuffleMirrorSplitUV), // %4
|
||||||
|
"m"(kMirrorSplitUVPermute) // %5
|
||||||
|
: "memory", "cc", "zmm0", "zmm1", "zmm2", "zmm3");
|
||||||
|
}
|
||||||
|
#endif // HAS_MIRRORSPLITUVROW_AVX512BW
|
||||||
|
|
||||||
|
#ifdef HAS_MIRRORSPLITUVROW_AVX2
|
||||||
|
void MirrorSplitUVRow_AVX2(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ptrdiff_t temp_width = (ptrdiff_t)(width);
|
ptrdiff_t temp_width = (ptrdiff_t)(width);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
@ -4759,16 +4820,13 @@ void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
|
|||||||
#ifdef HAS_RGB24MIRRORROW_AVX2
|
#ifdef HAS_RGB24MIRRORROW_AVX2
|
||||||
// Shuffle first 10 pixels to last 10 mirrored. first byte zero
|
// Shuffle first 10 pixels to last 10 mirrored. first byte zero
|
||||||
static const uvec8 kShuffleMirrorRGB0_AVX = {
|
static const uvec8 kShuffleMirrorRGB0_AVX = {
|
||||||
128u, 12u, 13u, 14u, 9u, 10u, 11u, 6u, 7u, 8u, 3u, 4u, 5u, 0u, 1u, 2u
|
128u, 12u, 13u, 14u, 9u, 10u, 11u, 6u, 7u, 8u, 3u, 4u, 5u, 0u, 1u, 2u};
|
||||||
};
|
|
||||||
|
|
||||||
// Shuffle last 2 pixels to first 2 mirrored. last byte zero
|
// Shuffle last 2 pixels to first 2 mirrored. last byte zero
|
||||||
static const uvec8 kShuffleMirrorRGB1_AVX = {
|
static const uvec8 kShuffleMirrorRGB1_AVX = {
|
||||||
13u, 14u, 15u, 10u, 11u, 12u, 7u, 8u, 9u, 4u, 5u, 6u, 1u, 2u, 3u, 128u
|
13u, 14u, 15u, 10u, 11u, 12u, 7u, 8u, 9u, 4u, 5u, 6u, 1u, 2u, 3u, 128u};
|
||||||
};
|
|
||||||
|
|
||||||
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
|
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
|
||||||
uint8_t* dst_rgb24,
|
|
||||||
int width) {
|
int width) {
|
||||||
ptrdiff_t temp_width = (ptrdiff_t)(width);
|
ptrdiff_t temp_width = (ptrdiff_t)(width);
|
||||||
src_rgb24 += width * 3 - 96;
|
src_rgb24 += width * 3 - 96;
|
||||||
@ -4801,9 +4859,9 @@ void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
|
|||||||
"sub $0x20,%2 \n"
|
"sub $0x20,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_rgb24), // %0
|
: "+r"(src_rgb24), // %0
|
||||||
"+r"(dst_rgb24), // %1
|
"+r"(dst_rgb24), // %1
|
||||||
"+r"(temp_width) // %2
|
"+r"(temp_width) // %2
|
||||||
: "m"(kShuffleMirrorRGB0_AVX), // %3
|
: "m"(kShuffleMirrorRGB0_AVX), // %3
|
||||||
"m"(kShuffleMirrorRGB1_AVX) // %4
|
"m"(kShuffleMirrorRGB1_AVX) // %4
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
@ -4894,6 +4952,45 @@ void SplitUVRow_AVX2(const uint8_t* src_uv,
|
|||||||
}
|
}
|
||||||
#endif // HAS_SPLITUVROW_AVX2
|
#endif // HAS_SPLITUVROW_AVX2
|
||||||
|
|
||||||
|
#ifdef HAS_SPLITUVROW_AVX512BW
|
||||||
|
static const uint64_t kSplitUVPermute[8] = {0, 2, 4, 6, 1, 3, 5, 7};
|
||||||
|
|
||||||
|
void SplitUVRow_AVX512BW(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
|
||||||
|
int width) {
|
||||||
|
asm volatile(
|
||||||
|
"vpternlogd $0xff,%%zmm5,%%zmm5,%%zmm5 \n"
|
||||||
|
"vpsrlw $0x8,%%zmm5,%%zmm5 \n"
|
||||||
|
"vmovdqu64 %4,%%zmm4 \n"
|
||||||
|
"sub %1,%2 \n"
|
||||||
|
|
||||||
|
LABELALIGN
|
||||||
|
"1: \n"
|
||||||
|
"vmovdqu8 (%0),%%zmm0 \n"
|
||||||
|
"vmovdqu8 0x40(%0),%%zmm1 \n"
|
||||||
|
"lea 0x80(%0),%0 \n"
|
||||||
|
"vpsrlw $0x8,%%zmm0,%%zmm2 \n"
|
||||||
|
"vpsrlw $0x8,%%zmm1,%%zmm3 \n"
|
||||||
|
"vpandd %%zmm5,%%zmm0,%%zmm0 \n"
|
||||||
|
"vpandd %%zmm5,%%zmm1,%%zmm1 \n"
|
||||||
|
"vpackuswb %%zmm1,%%zmm0,%%zmm0 \n"
|
||||||
|
"vpackuswb %%zmm3,%%zmm2,%%zmm2 \n"
|
||||||
|
"vpermq %%zmm0,%%zmm4,%%zmm0 \n"
|
||||||
|
"vpermq %%zmm2,%%zmm4,%%zmm2 \n"
|
||||||
|
"vmovdqu8 %%zmm0,(%1) \n"
|
||||||
|
"vmovdqu8 %%zmm2,0x00(%1,%2,1) \n"
|
||||||
|
"lea 0x40(%1),%1 \n"
|
||||||
|
"sub $0x40,%3 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"vzeroupper \n"
|
||||||
|
: "+r"(src_uv), // %0
|
||||||
|
"+r"(dst_u), // %1
|
||||||
|
"+r"(dst_v), // %2
|
||||||
|
"+r"(width) // %3
|
||||||
|
: "m"(kSplitUVPermute) // %4
|
||||||
|
: "memory", "cc", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5");
|
||||||
|
}
|
||||||
|
#endif // HAS_SPLITUVROW_AVX512BW
|
||||||
|
|
||||||
#ifdef HAS_SPLITUVROW_SSE2
|
#ifdef HAS_SPLITUVROW_SSE2
|
||||||
void SplitUVRow_SSE2(const uint8_t* src_uv,
|
void SplitUVRow_SSE2(const uint8_t* src_uv,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -8765,10 +8862,8 @@ void InterpolateRow_AVX2(uint8_t* dst_ptr,
|
|||||||
|
|
||||||
#ifdef HAS_INTERPOLATEROW_16_AVX2
|
#ifdef HAS_INTERPOLATEROW_16_AVX2
|
||||||
// Bilinear filter 16x2 -> 16x1
|
// Bilinear filter 16x2 -> 16x1
|
||||||
void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
|
void InterpolateRow_16_AVX2(uint16_t* dst_ptr, const uint16_t* src_ptr,
|
||||||
const uint16_t* src_ptr,
|
ptrdiff_t src_stride, int width,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
int width,
|
|
||||||
int source_y_fraction) {
|
int source_y_fraction) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"sub %1,%0 \n"
|
"sub %1,%0 \n"
|
||||||
@ -8783,10 +8878,14 @@ void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
|
|||||||
"vmovd %3,%%xmm5 \n"
|
"vmovd %3,%%xmm5 \n"
|
||||||
"vpunpcklwd %%xmm0,%%xmm5,%%xmm5 \n"
|
"vpunpcklwd %%xmm0,%%xmm5,%%xmm5 \n"
|
||||||
"vpbroadcastd %%xmm5,%%ymm5 \n"
|
"vpbroadcastd %%xmm5,%%ymm5 \n"
|
||||||
"mov $0x80008000,%%eax \n" // 0x80008000 used to bias unsigned words to signed range for vpmaddwd.
|
"mov $0x80008000,%%eax \n" // 0x80008000 used to bias
|
||||||
|
// unsigned words to
|
||||||
|
// signed range for
|
||||||
|
// vpmaddwd.
|
||||||
"vmovd %%eax,%%xmm4 \n"
|
"vmovd %%eax,%%xmm4 \n"
|
||||||
"vbroadcastss %%xmm4,%%ymm4 \n"
|
"vbroadcastss %%xmm4,%%ymm4 \n"
|
||||||
"mov $8388736,%%eax \n" // 32768 * 256 + 128 rounding constant.
|
"mov $8388736,%%eax \n" // 32768 * 256 + 128
|
||||||
|
// rounding constant.
|
||||||
"vmovd %%eax,%%xmm3 \n"
|
"vmovd %%eax,%%xmm3 \n"
|
||||||
"vbroadcastss %%xmm3,%%ymm3 \n"
|
"vbroadcastss %%xmm3,%%ymm3 \n"
|
||||||
|
|
||||||
@ -8811,8 +8910,7 @@ void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
|
|||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"jmp 99f \n"
|
"jmp 99f \n"
|
||||||
|
|
||||||
"50: \n"
|
"50: \n" LABELALIGN
|
||||||
LABELALIGN
|
|
||||||
"2: \n"
|
"2: \n"
|
||||||
"vmovdqu (%1),%%ymm0 \n"
|
"vmovdqu (%1),%%ymm0 \n"
|
||||||
"vpavgw (%1,%4,2),%%ymm0,%%ymm0 \n"
|
"vpavgw (%1,%4,2),%%ymm0,%%ymm0 \n"
|
||||||
@ -8822,8 +8920,7 @@ void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
|
|||||||
"jg 2b \n"
|
"jg 2b \n"
|
||||||
"jmp 99f \n"
|
"jmp 99f \n"
|
||||||
|
|
||||||
"100: \n"
|
"100: \n" LABELALIGN
|
||||||
LABELALIGN
|
|
||||||
"3: \n"
|
"3: \n"
|
||||||
"vmovdqu (%1),%%ymm0 \n"
|
"vmovdqu (%1),%%ymm0 \n"
|
||||||
"vmovdqu %%ymm0,0x00(%1,%0,1) \n"
|
"vmovdqu %%ymm0,0x00(%1,%0,1) \n"
|
||||||
@ -8901,31 +8998,28 @@ void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
|
|||||||
|
|
||||||
#ifdef HAS_ARGBSHUFFLEROW_AVX512BW
|
#ifdef HAS_ARGBSHUFFLEROW_AVX512BW
|
||||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||||
void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb,
|
void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb, uint8_t* dst_argb,
|
||||||
uint8_t* dst_argb,
|
const uint8_t* shuffler, int width) {
|
||||||
const uint8_t* shuffler,
|
asm volatile("vbroadcasti32x4 (%3),%%zmm5 \n"
|
||||||
int width) {
|
|
||||||
asm volatile(
|
|
||||||
"vbroadcasti32x4 (%3),%%zmm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovdqu8 (%0),%%zmm0 \n"
|
"vmovdqu8 (%0),%%zmm0 \n"
|
||||||
"vmovdqu8 0x40(%0),%%zmm1 \n"
|
"vmovdqu8 0x40(%0),%%zmm1 \n"
|
||||||
"lea 0x80(%0),%0 \n"
|
"lea 0x80(%0),%0 \n"
|
||||||
"vpshufb %%zmm5,%%zmm0,%%zmm0 \n"
|
"vpshufb %%zmm5,%%zmm0,%%zmm0 \n"
|
||||||
"vpshufb %%zmm5,%%zmm1,%%zmm1 \n"
|
"vpshufb %%zmm5,%%zmm1,%%zmm1 \n"
|
||||||
"vmovdqu8 %%zmm0,(%1) \n"
|
"vmovdqu8 %%zmm0,(%1) \n"
|
||||||
"vmovdqu8 %%zmm1,0x40(%1) \n"
|
"vmovdqu8 %%zmm1,0x40(%1) \n"
|
||||||
"lea 0x80(%1),%1 \n"
|
"lea 0x80(%1),%1 \n"
|
||||||
"sub $0x20,%2 \n"
|
"sub $0x20,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "r"(shuffler) // %3
|
: "r"(shuffler) // %3
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm5");
|
: "memory", "cc", "xmm0", "xmm1", "xmm5");
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBSHUFFLEROW_AVX512BW
|
#endif // HAS_ARGBSHUFFLEROW_AVX512BW
|
||||||
|
|
||||||
|
|||||||
@ -1887,13 +1887,13 @@ void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
|
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
|
||||||
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
|
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
: "r"(&c->kRGBToU), // %4
|
: "r"(&c->kRGBToU), // %4
|
||||||
"r"(&c->kRGBToV), // %5
|
"r"(&c->kRGBToV), // %5
|
||||||
"r"(&c->kAddUV) // %6
|
"r"(&c->kAddUV) // %6
|
||||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
|
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
|
||||||
"q10", "q11", "q12");
|
"q10", "q11", "q12");
|
||||||
}
|
}
|
||||||
@ -1934,8 +1934,9 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
const uint8_t* src_argb_1 = src_argb + src_stride_argb;
|
const uint8_t* src_argb_1 = src_argb + src_stride_argb;
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"vld1.8 {d24}, [%5] \n" // load kRGBToU (8 bytes, only 4 used)
|
"vld1.8 {d24}, [%5] \n" // load kRGBToU (8 bytes,
|
||||||
|
// only 4 used)
|
||||||
"vld1.8 {d25}, [%6] \n" // load kRGBToV
|
"vld1.8 {d25}, [%6] \n" // load kRGBToV
|
||||||
"vmovl.s8 q14, d24 \n" // U coeffs in d28
|
"vmovl.s8 q14, d24 \n" // U coeffs in d28
|
||||||
"vmovl.s8 q15, d25 \n" // V coeffs in d30
|
"vmovl.s8 q15, d25 \n" // V coeffs in d30
|
||||||
@ -1943,7 +1944,8 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB
|
||||||
|
// pixels.
|
||||||
"subs %4, %4, #16 \n" // 16 processed per loop.
|
"subs %4, %4, #16 \n" // 16 processed per loop.
|
||||||
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
|
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
|
||||||
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||||
@ -1985,16 +1987,15 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
|
||||||
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb_1), // %1
|
"+r"(src_argb_1), // %1
|
||||||
"+r"(dst_u), // %2
|
"+r"(dst_u), // %2
|
||||||
"+r"(dst_v), // %3
|
"+r"(dst_v), // %3
|
||||||
"+r"(width) // %4
|
"+r"(width) // %4
|
||||||
: "r"(&c->kRGBToU), // %5
|
: "r"(&c->kRGBToU), // %5
|
||||||
"r"(&c->kRGBToV) // %6
|
"r"(&c->kRGBToV) // %6
|
||||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
|
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
|
||||||
"q8", "q9", "q11", "q12", "q14", "q15"
|
"q9", "q11", "q12", "q14", "q15");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
||||||
|
|||||||
@ -2736,26 +2736,26 @@ struct RgbUVConstants {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// 8x1 pixels.
|
// 8x1 pixels.
|
||||||
void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
|
void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"ldr q16, [%[c], #16] \n" // kRGBToU
|
"ldr q16, [%[c], #16] \n" // kRGBToU
|
||||||
"ldr q17, [%[c], #32] \n" // kRGBToV
|
"ldr q17, [%[c], #32] \n" // kRGBToV
|
||||||
"ldr s0, [%[c], #64] \n" // kAddUV
|
"ldr s0, [%[c], #64] \n" // kAddUV
|
||||||
"sxtl v16.8h, v16.8b \n" // sign extend U coeffs to 16-bit
|
"sxtl v16.8h, v16.8b \n" // sign extend U coeffs to
|
||||||
"sxtl v17.8h, v17.8b \n" // sign extend V coeffs to 16-bit
|
// 16-bit
|
||||||
"dup v20.8h, v16.h[0] \n" // U0
|
"sxtl v17.8h, v17.8b \n" // sign extend V coeffs to
|
||||||
"dup v21.8h, v16.h[1] \n" // U1
|
// 16-bit
|
||||||
"dup v22.8h, v16.h[2] \n" // U2
|
"dup v20.8h, v16.h[0] \n" // U0
|
||||||
"dup v23.8h, v16.h[3] \n" // U3
|
"dup v21.8h, v16.h[1] \n" // U1
|
||||||
"dup v24.8h, v17.h[0] \n" // V0
|
"dup v22.8h, v16.h[2] \n" // U2
|
||||||
"dup v26.8h, v17.h[1] \n" // V1
|
"dup v23.8h, v16.h[3] \n" // U3
|
||||||
"dup v27.8h, v17.h[2] \n" // V2
|
"dup v24.8h, v17.h[0] \n" // V0
|
||||||
"dup v28.8h, v17.h[3] \n" // V3
|
"dup v26.8h, v17.h[1] \n" // V1
|
||||||
"dup v25.8h, v0.h[0] \n" // kAddUV
|
"dup v27.8h, v17.h[2] \n" // V2
|
||||||
|
"dup v28.8h, v17.h[3] \n" // V3
|
||||||
|
"dup v25.8h, v0.h[0] \n" // kAddUV
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
|
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
|
||||||
"subs %w3, %w3, #8 \n" // 8 processed per loop.
|
"subs %w3, %w3, #8 \n" // 8 processed per loop.
|
||||||
@ -2783,27 +2783,25 @@ void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
"st1 {v0.8b}, [%1], #8 \n"
|
"st1 {v0.8b}, [%1], #8 \n"
|
||||||
"st1 {v1.8b}, [%2], #8 \n"
|
"st1 {v1.8b}, [%2], #8 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_u), // %1
|
"+r"(dst_u), // %1
|
||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+r"(width) // %3
|
"+r"(width) // %3
|
||||||
: [c] "r"(c) // %4
|
: [c] "r"(c) // %4
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
|
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
|
||||||
"v26", "v27", "v28");
|
"v27", "v28");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ARGBToUV444MatrixRow_NEON_I8MM(
|
static void ARGBToUV444MatrixRow_NEON_I8MM(const uint8_t* src_argb,
|
||||||
const uint8_t* src_argb,
|
uint8_t* dst_u, uint8_t* dst_v,
|
||||||
uint8_t* dst_u,
|
int width,
|
||||||
uint8_t* dst_v,
|
const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"ldr q16, [%[c], #16] \n" // kRGBToU
|
"ldr q16, [%[c], #16] \n" // kRGBToU
|
||||||
"ldr q17, [%[c], #32] \n" // kRGBToV
|
"ldr q17, [%[c], #32] \n" // kRGBToV
|
||||||
"ldr s0, [%[c], #64] \n" // kAddUV
|
"ldr s0, [%[c], #64] \n" // kAddUV
|
||||||
"dup v29.8h, v0.h[0] \n" // 128.0
|
"dup v29.8h, v0.h[0] \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ldp q0, q1, [%[src]], #32 \n"
|
"ldp q0, q1, [%[src]], #32 \n"
|
||||||
"subs %w[width], %w[width], #8 \n" // 8 processed per loop.
|
"subs %w[width], %w[width], #8 \n" // 8 processed per loop.
|
||||||
@ -2823,11 +2821,11 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(
|
|||||||
"str d0, [%[dst_u]], #8 \n" // store 8 pixels U.
|
"str d0, [%[dst_u]], #8 \n" // store 8 pixels U.
|
||||||
"str d1, [%[dst_v]], #8 \n" // store 8 pixels V.
|
"str d1, [%[dst_v]], #8 \n" // store 8 pixels V.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src] "+r"(src_argb), // %[src]
|
: [src] "+r"(src_argb), // %[src]
|
||||||
[dst_u] "+r"(dst_u), // %[dst_u]
|
[dst_u] "+r"(dst_u), // %[dst_u]
|
||||||
[dst_v] "+r"(dst_v), // %[dst_v]
|
[dst_v] "+r"(dst_v), // %[dst_v]
|
||||||
[width] "+r"(width) // %[width]
|
[width] "+r"(width) // %[width]
|
||||||
: [c] "r"(c) // %[c]
|
: [c] "r"(c) // %[c]
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17",
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17",
|
||||||
"v29");
|
"v29");
|
||||||
}
|
}
|
||||||
@ -2844,8 +2842,7 @@ void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, &kArgbI601Constants);
|
||||||
&kArgbI601Constants);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
|
void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||||
@ -2860,8 +2857,7 @@ void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, &kArgbJPEGConstants);
|
||||||
&kArgbJPEGConstants);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb,
|
void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||||
@ -2903,23 +2899,27 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
const uint8_t* src_argb_1 = src_argb + src_stride_argb;
|
const uint8_t* src_argb_1 = src_argb + src_stride_argb;
|
||||||
asm volatile (
|
asm volatile(
|
||||||
"ldr q16, [%[c], #16] \n" // kRGBToU
|
"ldr q16, [%[c], #16] \n" // kRGBToU
|
||||||
"ldr q17, [%[c], #32] \n" // kRGBToV
|
"ldr q17, [%[c], #32] \n" // kRGBToV
|
||||||
"sxtl v16.8h, v16.8b \n" // sign extend U coeffs to 16-bit
|
"sxtl v16.8h, v16.8b \n" // sign extend U coeffs to
|
||||||
"sxtl v17.8h, v17.8b \n" // sign extend V coeffs to 16-bit
|
// 16-bit
|
||||||
"dup v20.8h, v16.h[0] \n" // U0
|
"sxtl v17.8h, v17.8b \n" // sign extend V coeffs to
|
||||||
"dup v21.8h, v16.h[1] \n" // U1
|
// 16-bit
|
||||||
"dup v22.8h, v16.h[2] \n" // U2
|
"dup v20.8h, v16.h[0] \n" // U0
|
||||||
"dup v23.8h, v16.h[3] \n" // U3
|
"dup v21.8h, v16.h[1] \n" // U1
|
||||||
"dup v24.8h, v17.h[0] \n" // V0
|
"dup v22.8h, v16.h[2] \n" // U2
|
||||||
"dup v26.8h, v17.h[1] \n" // V1
|
"dup v23.8h, v16.h[3] \n" // U3
|
||||||
"dup v27.8h, v17.h[2] \n" // V2
|
"dup v24.8h, v17.h[0] \n" // V0
|
||||||
"dup v28.8h, v17.h[3] \n" // V3
|
"dup v26.8h, v17.h[1] \n" // V1
|
||||||
"movi v25.8h, #0x80, lsl #8 \n" // 128.0 in 16-bit (0x8000)
|
"dup v27.8h, v17.h[2] \n" // V2
|
||||||
|
"dup v28.8h, v17.h[3] \n" // V3
|
||||||
|
"movi v25.8h, #0x80, lsl #8 \n" // 128.0 in 16-bit
|
||||||
|
// (0x8000)
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
|
||||||
|
// pixels.
|
||||||
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
||||||
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
|
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
|
||||||
"prfm pldl1keep, [%0, 448] \n"
|
"prfm pldl1keep, [%0, 448] \n"
|
||||||
@ -2927,7 +2927,8 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
|
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
|
||||||
"uaddlp v18.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
|
"uaddlp v18.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
|
||||||
|
|
||||||
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more.
|
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16
|
||||||
|
// more.
|
||||||
"uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
|
"uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
|
||||||
"prfm pldl1keep, [%1, 448] \n"
|
"prfm pldl1keep, [%1, 448] \n"
|
||||||
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
|
||||||
@ -2958,16 +2959,15 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
|
||||||
"st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
|
"st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(src_argb_1), // %1
|
"+r"(src_argb_1), // %1
|
||||||
"+r"(dst_u), // %2
|
"+r"(dst_u), // %2
|
||||||
"+r"(dst_v), // %3
|
"+r"(dst_v), // %3
|
||||||
"+r"(width) // %4
|
"+r"(width) // %4
|
||||||
: [c] "r"(c) // %5
|
: [c] "r"(c) // %5
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||||
"v16", "v17", "v18", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
|
"v17", "v18", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
|
||||||
"v27", "v28"
|
"v28");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
||||||
@ -2988,29 +2988,20 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
|||||||
&kArgbJPEGConstants);
|
&kArgbJPEGConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToUVRow_NEON(const uint8_t* src_abgr,
|
void ABGRToUVRow_NEON(const uint8_t* src_abgr, int src_stride_abgr,
|
||||||
int src_stride_abgr,
|
uint8_t* dst_u, uint8_t* dst_v, int width) {
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width) {
|
|
||||||
ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
||||||
&kAbgrI601Constants);
|
&kAbgrI601Constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToUVRow_NEON(const uint8_t* src_bgra,
|
void BGRAToUVRow_NEON(const uint8_t* src_bgra, int src_stride_bgra,
|
||||||
int src_stride_bgra,
|
uint8_t* dst_u, uint8_t* dst_v, int width) {
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width) {
|
|
||||||
ARGBToUVMatrixRow_NEON(src_bgra, src_stride_bgra, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON(src_bgra, src_stride_bgra, dst_u, dst_v, width,
|
||||||
&kBgraI601Constants);
|
&kBgraI601Constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAToUVRow_NEON(const uint8_t* src_rgba,
|
void RGBAToUVRow_NEON(const uint8_t* src_rgba, int src_stride_rgba,
|
||||||
int src_stride_rgba,
|
uint8_t* dst_u, uint8_t* dst_v, int width) {
|
||||||
uint8_t* dst_u,
|
|
||||||
uint8_t* dst_v,
|
|
||||||
int width) {
|
|
||||||
ARGBToUVMatrixRow_NEON(src_rgba, src_stride_rgba, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON(src_rgba, src_stride_rgba, dst_u, dst_v, width,
|
||||||
&kRgbaI601Constants);
|
&kRgbaI601Constants);
|
||||||
}
|
}
|
||||||
@ -3329,12 +3320,10 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Process any of ARGB, ABGR, BGRA, RGBA, by adjusting the ArgbConstants layout.
|
// Process any of ARGB, ABGR, BGRA, RGBA, by adjusting the ArgbConstants layout.
|
||||||
static void ARGBToUVMatrixRow_NEON_I8MM_Impl(const uint8_t* src,
|
static void ARGBToUVMatrixRow_NEON_I8MM_Impl(const uint8_t* src, int src_stride,
|
||||||
int src_stride,
|
uint8_t* dst_u, uint8_t* dst_v,
|
||||||
uint8_t* dst_u,
|
int width,
|
||||||
uint8_t* dst_v,
|
const struct ArgbConstants* c) {
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
|
||||||
const uint8_t* src1 = src + src_stride;
|
const uint8_t* src1 = src + src_stride;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movi v23.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in
|
"movi v23.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in
|
||||||
@ -3388,12 +3377,12 @@ static void ARGBToUVMatrixRow_NEON_I8MM_Impl(const uint8_t* src,
|
|||||||
"str d0, [%[dst_u]], #8 \n" // store 8 pixels U
|
"str d0, [%[dst_u]], #8 \n" // store 8 pixels U
|
||||||
"str d1, [%[dst_v]], #8 \n" // store 8 pixels V
|
"str d1, [%[dst_v]], #8 \n" // store 8 pixels V
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src] "+r"(src), // %[src]
|
: [src] "+r"(src), // %[src]
|
||||||
[src1] "+r"(src1), // %[src1]
|
[src1] "+r"(src1), // %[src1]
|
||||||
[dst_u] "+r"(dst_u), // %[dst_u]
|
[dst_u] "+r"(dst_u), // %[dst_u]
|
||||||
[dst_v] "+r"(dst_v), // %[dst_v]
|
[dst_v] "+r"(dst_v), // %[dst_v]
|
||||||
[width] "+r"(width) // %[width]
|
[width] "+r"(width) // %[width]
|
||||||
: [c] "r"(c) // %[c]
|
: [c] "r"(c) // %[c]
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v23",
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v23",
|
||||||
"v24", "v25");
|
"v24", "v25");
|
||||||
}
|
}
|
||||||
@ -3404,8 +3393,8 @@ void ARGBToUVMatrixRow_NEON_I8MM(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width,
|
int width,
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v,
|
||||||
c);
|
width, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUVRow_NEON_I8MM(const uint8_t* src_argb,
|
void ARGBToUVRow_NEON_I8MM(const uint8_t* src_argb,
|
||||||
@ -3413,8 +3402,8 @@ void ARGBToUVRow_NEON_I8MM(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v,
|
||||||
&kArgbI601Constants);
|
width, &kArgbI601Constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToUVRow_NEON_I8MM(const uint8_t* src_abgr,
|
void ABGRToUVRow_NEON_I8MM(const uint8_t* src_abgr,
|
||||||
@ -3422,8 +3411,8 @@ void ABGRToUVRow_NEON_I8MM(const uint8_t* src_abgr,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v,
|
||||||
&kAbgrI601Constants);
|
width, &kAbgrI601Constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToUVRow_NEON_I8MM(const uint8_t* src_bgra,
|
void BGRAToUVRow_NEON_I8MM(const uint8_t* src_bgra,
|
||||||
@ -3431,8 +3420,8 @@ void BGRAToUVRow_NEON_I8MM(const uint8_t* src_bgra,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_bgra, src_stride_bgra, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_bgra, src_stride_bgra, dst_u, dst_v,
|
||||||
&kBgraI601Constants);
|
width, &kBgraI601Constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAToUVRow_NEON_I8MM(const uint8_t* src_rgba,
|
void RGBAToUVRow_NEON_I8MM(const uint8_t* src_rgba,
|
||||||
@ -3440,8 +3429,8 @@ void RGBAToUVRow_NEON_I8MM(const uint8_t* src_rgba,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_rgba, src_stride_rgba, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_rgba, src_stride_rgba, dst_u, dst_v,
|
||||||
&kRgbaI601Constants);
|
width, &kRgbaI601Constants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUVJRow_NEON_I8MM(const uint8_t* src_argb,
|
void ARGBToUVJRow_NEON_I8MM(const uint8_t* src_argb,
|
||||||
@ -3449,8 +3438,8 @@ void ARGBToUVJRow_NEON_I8MM(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v,
|
||||||
&kArgbJPEGConstants);
|
width, &kArgbJPEGConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToUVJRow_NEON_I8MM(const uint8_t* src_abgr,
|
void ABGRToUVJRow_NEON_I8MM(const uint8_t* src_abgr,
|
||||||
@ -3458,8 +3447,8 @@ void ABGRToUVJRow_NEON_I8MM(const uint8_t* src_abgr,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v, width,
|
ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v,
|
||||||
&kAbgrJPEGConstants);
|
width, &kAbgrJPEGConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
|
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
|
||||||
@ -3589,15 +3578,14 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
|
|||||||
"addhn v1.8b, v1.8h, v22.8h \n"
|
"addhn v1.8b, v1.8h, v22.8h \n"
|
||||||
"st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y.
|
"st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "r"(c) // %3
|
: "r"(c) // %3
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18",
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18",
|
||||||
"v19", "v20", "v21", "v22");
|
"v19", "v20", "v21", "v22");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ARGBToYMatrixRow_NEON_DotProd(
|
void ARGBToYMatrixRow_NEON_DotProd(
|
||||||
const uint8_t* src_argb,
|
const uint8_t* src_argb,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
@ -3625,14 +3613,14 @@ void ARGBToYMatrixRow_NEON_DotProd(
|
|||||||
"addhn v1.8b, v1.8h, v19.8h \n"
|
"addhn v1.8b, v1.8h, v19.8h \n"
|
||||||
"st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y.
|
"st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "r"(c) // %3
|
: "r"(c) // %3
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19");
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||||
|
"v17", "v18", "v19");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// RGB to JPeg coefficients
|
// RGB to JPeg coefficients
|
||||||
|
|
||||||
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||||
@ -3732,10 +3720,10 @@ void RGBToYMatrixRow_NEON(const uint8_t* src_rgb,
|
|||||||
"addhn v1.8b, v1.8h, v21.8h \n"
|
"addhn v1.8b, v1.8h, v21.8h \n"
|
||||||
"st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y.
|
"st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_rgb), // %0
|
: "+r"(src_rgb), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "r"(c) // %3
|
: "r"(c) // %3
|
||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v16", "v17", "v18",
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v16", "v17", "v18",
|
||||||
"v19", "v20", "v21");
|
"v19", "v20", "v21");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -116,10 +116,8 @@ extern "C" {
|
|||||||
// Convert 32 ARGB pixels (128 bytes) to 32 UV444 values.
|
// Convert 32 ARGB pixels (128 bytes) to 32 UV444 values.
|
||||||
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) || defined(HAS_ARGBTOUV444MATRIXROW_AVX2)
|
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) || defined(HAS_ARGBTOUV444MATRIXROW_AVX2)
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
|
void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width,
|
||||||
uint8_t* dst_v,
|
|
||||||
int width,
|
|
||||||
const struct ArgbConstants* c) {
|
const struct ArgbConstants* c) {
|
||||||
__m256i ymm_u =
|
__m256i ymm_u =
|
||||||
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToU));
|
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToU));
|
||||||
@ -455,8 +453,8 @@ void MergeUVRow_AVX2(const uint8_t* src_u,
|
|||||||
#ifdef HAS_MIRRORROW_AVX2
|
#ifdef HAS_MIRRORROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
__m256i ymm_shuf =
|
__m256i ymm_shuf = _mm256_broadcastsi128_si256(
|
||||||
_mm256_broadcastsi128_si256(_mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
|
_mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
|
||||||
src += width;
|
src += width;
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
src -= 32;
|
src -= 32;
|
||||||
@ -473,8 +471,8 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
#ifdef HAS_MIRRORUVROW_AVX2
|
#ifdef HAS_MIRRORUVROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
|
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
|
||||||
__m256i ymm_shuf =
|
__m256i ymm_shuf = _mm256_broadcastsi128_si256(
|
||||||
_mm256_broadcastsi128_si256(_mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1));
|
_mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1));
|
||||||
src_uv += width * 2;
|
src_uv += width * 2;
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
src_uv -= 32;
|
src_uv -= 32;
|
||||||
@ -490,12 +488,10 @@ void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
|
|||||||
|
|
||||||
#ifdef HAS_MIRRORSPLITUVROW_AVX2
|
#ifdef HAS_MIRRORSPLITUVROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void MirrorSplitUVRow_AVX2(const uint8_t* src_uv,
|
void MirrorSplitUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_u,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_v, int width) {
|
||||||
uint8_t* dst_v,
|
__m256i ymm_shuf = _mm256_broadcastsi128_si256(
|
||||||
int width) {
|
_mm_setr_epi8(14, 12, 10, 8, 6, 4, 2, 0, 15, 13, 11, 9, 7, 5, 3, 1));
|
||||||
__m256i ymm_shuf =
|
|
||||||
_mm256_broadcastsi128_si256(_mm_setr_epi8(14, 12, 10, 8, 6, 4, 2, 0, 15, 13, 11, 9, 7, 5, 3, 1));
|
|
||||||
src_uv += width * 2;
|
src_uv += width * 2;
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
src_uv -= 32;
|
src_uv -= 32;
|
||||||
@ -513,28 +509,30 @@ void MirrorSplitUVRow_AVX2(const uint8_t* src_uv,
|
|||||||
|
|
||||||
#ifdef HAS_RGB24MIRRORROW_AVX2
|
#ifdef HAS_RGB24MIRRORROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
|
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_rgb24,
|
||||||
uint8_t* dst_rgb24,
|
|
||||||
int width) {
|
int width) {
|
||||||
__m256i shuf0 = _mm256_setr_epi8(
|
__m256i shuf0 =
|
||||||
-1, 12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2,
|
_mm256_setr_epi8(-1, 12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2, -1,
|
||||||
-1, 12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2);
|
12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2);
|
||||||
__m128i shuf1 = _mm_setr_epi8(
|
__m128i shuf1 =
|
||||||
13, 14, 15, 10, 11, 12, 7, 8, 9, 4, 5, 6, 1, 2, 3, -1);
|
_mm_setr_epi8(13, 14, 15, 10, 11, 12, 7, 8, 9, 4, 5, 6, 1, 2, 3, -1);
|
||||||
|
|
||||||
src_rgb24 += width * 3 - 96;
|
src_rgb24 += width * 3 - 96;
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
__m128i v0_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 0));
|
__m128i v0_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 0));
|
||||||
__m128i v0_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 15));
|
__m128i v0_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 15));
|
||||||
__m256i v0 = _mm256_inserti128_si256(_mm256_castsi128_si256(v0_lo), v0_hi, 1);
|
__m256i v0 =
|
||||||
|
_mm256_inserti128_si256(_mm256_castsi128_si256(v0_lo), v0_hi, 1);
|
||||||
|
|
||||||
__m128i v1_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 30));
|
__m128i v1_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 30));
|
||||||
__m128i v1_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 45));
|
__m128i v1_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 45));
|
||||||
__m256i v1 = _mm256_inserti128_si256(_mm256_castsi128_si256(v1_lo), v1_hi, 1);
|
__m256i v1 =
|
||||||
|
_mm256_inserti128_si256(_mm256_castsi128_si256(v1_lo), v1_hi, 1);
|
||||||
|
|
||||||
__m128i v2_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 60));
|
__m128i v2_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 60));
|
||||||
__m128i v2_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 75));
|
__m128i v2_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 75));
|
||||||
__m256i v2 = _mm256_inserti128_si256(_mm256_castsi128_si256(v2_lo), v2_hi, 1);
|
__m256i v2 =
|
||||||
|
_mm256_inserti128_si256(_mm256_castsi128_si256(v2_lo), v2_hi, 1);
|
||||||
|
|
||||||
__m128i v3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 80));
|
__m128i v3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 80));
|
||||||
|
|
||||||
@ -544,11 +542,14 @@ void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
|
|||||||
v3 = _mm_shuffle_epi8(v3, shuf1);
|
v3 = _mm_shuffle_epi8(v3, shuf1);
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)(dst_rgb24 + 80), _mm256_castsi256_si128(v0));
|
_mm_storeu_si128((__m128i*)(dst_rgb24 + 80), _mm256_castsi256_si128(v0));
|
||||||
_mm_storeu_si128((__m128i*)(dst_rgb24 + 65), _mm256_extracti128_si256(v0, 1));
|
_mm_storeu_si128((__m128i*)(dst_rgb24 + 65),
|
||||||
|
_mm256_extracti128_si256(v0, 1));
|
||||||
_mm_storeu_si128((__m128i*)(dst_rgb24 + 50), _mm256_castsi256_si128(v1));
|
_mm_storeu_si128((__m128i*)(dst_rgb24 + 50), _mm256_castsi256_si128(v1));
|
||||||
_mm_storeu_si128((__m128i*)(dst_rgb24 + 35), _mm256_extracti128_si256(v1, 1));
|
_mm_storeu_si128((__m128i*)(dst_rgb24 + 35),
|
||||||
|
_mm256_extracti128_si256(v1, 1));
|
||||||
_mm_storeu_si128((__m128i*)(dst_rgb24 + 20), _mm256_castsi256_si128(v2));
|
_mm_storeu_si128((__m128i*)(dst_rgb24 + 20), _mm256_castsi256_si128(v2));
|
||||||
_mm_storeu_si128((__m128i*)(dst_rgb24 + 5), _mm256_extracti128_si256(v2, 1));
|
_mm_storeu_si128((__m128i*)(dst_rgb24 + 5),
|
||||||
|
_mm256_extracti128_si256(v2, 1));
|
||||||
_mm_storel_epi64((__m128i*)(dst_rgb24 + 0), v3);
|
_mm_storel_epi64((__m128i*)(dst_rgb24 + 0), v3);
|
||||||
|
|
||||||
src_rgb24 -= 96;
|
src_rgb24 -= 96;
|
||||||
@ -560,10 +561,8 @@ void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
|
|||||||
|
|
||||||
#ifdef HAS_INTERPOLATEROW_AVX2
|
#ifdef HAS_INTERPOLATEROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void InterpolateRow_AVX2(uint8_t* dst_ptr,
|
void InterpolateRow_AVX2(uint8_t* dst_ptr, const uint8_t* src_ptr,
|
||||||
const uint8_t* src_ptr,
|
ptrdiff_t src_stride, int width,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
int width,
|
|
||||||
int source_y_fraction) {
|
int source_y_fraction) {
|
||||||
int y1 = source_y_fraction;
|
int y1 = source_y_fraction;
|
||||||
int y0 = 256 - y1;
|
int y0 = 256 - y1;
|
||||||
@ -607,10 +606,8 @@ void InterpolateRow_AVX2(uint8_t* dst_ptr,
|
|||||||
|
|
||||||
#ifdef HAS_INTERPOLATEROW_16_AVX2
|
#ifdef HAS_INTERPOLATEROW_16_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
|
void InterpolateRow_16_AVX2(uint16_t* dst_ptr, const uint16_t* src_ptr,
|
||||||
const uint16_t* src_ptr,
|
ptrdiff_t src_stride, int width,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
int width,
|
|
||||||
int source_y_fraction) {
|
int source_y_fraction) {
|
||||||
int y1 = source_y_fraction;
|
int y1 = source_y_fraction;
|
||||||
int y0 = 256 - y1;
|
int y0 = 256 - y1;
|
||||||
@ -629,7 +626,8 @@ void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
|
|||||||
for (i = 0; i < width; i += 16) {
|
for (i = 0; i < width; i += 16) {
|
||||||
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
|
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
|
||||||
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
|
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
|
||||||
_mm256_storeu_si256((__m256i*)(dst_ptr + i), _mm256_avg_epu16(row0, row1));
|
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
|
||||||
|
_mm256_avg_epu16(row0, row1));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < width; i += 16) {
|
for (i = 0; i < width; i += 16) {
|
||||||
@ -672,21 +670,23 @@ void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
#ifdef HAS_J400TOARGBROW_AVX2
|
#ifdef HAS_J400TOARGBROW_AVX2
|
||||||
alignas(32) static const uint8_t kShuffleMaskJ400ToARGB_0[32] = {
|
alignas(32) static const uint8_t kShuffleMaskJ400ToARGB_0[32] = {
|
||||||
0u, 0u, 0u, 128u, 1u, 1u, 1u, 128u, 2u, 2u, 2u, 128u, 3u, 3u, 3u, 128u,
|
0u, 0u, 0u, 128u, 1u, 1u, 1u, 128u, 2u, 2u, 2u, 128u, 3u, 3u, 3u, 128u,
|
||||||
4u, 4u, 4u, 128u, 5u, 5u, 5u, 128u, 6u, 6u, 6u, 128u, 7u, 7u, 7u, 128u
|
4u, 4u, 4u, 128u, 5u, 5u, 5u, 128u, 6u, 6u, 6u, 128u, 7u, 7u, 7u, 128u};
|
||||||
};
|
|
||||||
alignas(32) static const uint8_t kShuffleMaskJ400ToARGB_1[32] = {
|
alignas(32) static const uint8_t kShuffleMaskJ400ToARGB_1[32] = {
|
||||||
8u, 8u, 8u, 128u, 9u, 9u, 9u, 128u, 10u, 10u, 10u, 128u, 11u, 11u, 11u, 128u,
|
8u, 8u, 8u, 128u, 9u, 9u, 9u, 128u, 10u, 10u, 10u,
|
||||||
12u, 12u, 12u, 128u, 13u, 13u, 13u, 128u, 14u, 14u, 14u, 128u, 15u, 15u, 15u, 128u
|
128u, 11u, 11u, 11u, 128u, 12u, 12u, 12u, 128u, 13u, 13u,
|
||||||
};
|
13u, 128u, 14u, 14u, 14u, 128u, 15u, 15u, 15u, 128u};
|
||||||
|
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
||||||
__m256i ymm_mask0 = _mm256_load_si256((const __m256i*)kShuffleMaskJ400ToARGB_0);
|
__m256i ymm_mask0 =
|
||||||
__m256i ymm_mask1 = _mm256_load_si256((const __m256i*)kShuffleMaskJ400ToARGB_1);
|
_mm256_load_si256((const __m256i*)kShuffleMaskJ400ToARGB_0);
|
||||||
|
__m256i ymm_mask1 =
|
||||||
|
_mm256_load_si256((const __m256i*)kShuffleMaskJ400ToARGB_1);
|
||||||
__m256i ymm_alpha = _mm256_set1_epi32((int)0xff000000u);
|
__m256i ymm_alpha = _mm256_set1_epi32((int)0xff000000u);
|
||||||
|
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
__m256i ymm0 = _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)src_y));
|
__m256i ymm0 =
|
||||||
|
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)src_y));
|
||||||
|
|
||||||
__m256i ymm1 = _mm256_shuffle_epi8(ymm0, ymm_mask0);
|
__m256i ymm1 = _mm256_shuffle_epi8(ymm0, ymm_mask0);
|
||||||
__m256i ymm2 = _mm256_shuffle_epi8(ymm0, ymm_mask1);
|
__m256i ymm2 = _mm256_shuffle_epi8(ymm0, ymm_mask1);
|
||||||
@ -707,13 +707,14 @@ void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width) {
|
|||||||
#ifdef HAS_RGB24TOARGBROW_AVX2
|
#ifdef HAS_RGB24TOARGBROW_AVX2
|
||||||
alignas(16) static const uint8_t kShuffleMaskRGB24ToARGB[2][16] = {
|
alignas(16) static const uint8_t kShuffleMaskRGB24ToARGB[2][16] = {
|
||||||
{0u, 1u, 2u, 128u, 3u, 4u, 5u, 128u, 6u, 7u, 8u, 128u, 9u, 10u, 11u, 128u},
|
{0u, 1u, 2u, 128u, 3u, 4u, 5u, 128u, 6u, 7u, 8u, 128u, 9u, 10u, 11u, 128u},
|
||||||
{4u, 5u, 6u, 128u, 7u, 8u, 9u, 128u, 10u, 11u, 12u, 128u, 13u, 14u, 15u, 128u}
|
{4u, 5u, 6u, 128u, 7u, 8u, 9u, 128u, 10u, 11u, 12u, 128u, 13u, 14u, 15u,
|
||||||
};
|
128u}};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAS_RGB565TOARGBROW_AVX2
|
#ifdef HAS_RGB565TOARGBROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) {
|
void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
__m256i ymm_scale_rb = _mm256_set1_epi32(0x01080108);
|
__m256i ymm_scale_rb = _mm256_set1_epi32(0x01080108);
|
||||||
__m256i ymm_scale_g = _mm256_set1_epi32(0x20802080);
|
__m256i ymm_scale_g = _mm256_set1_epi32(0x20802080);
|
||||||
__m256i ymm_mask_b = _mm256_set1_epi16((short)0xf800);
|
__m256i ymm_mask_b = _mm256_set1_epi16((short)0xf800);
|
||||||
@ -730,11 +731,11 @@ void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb, int widt
|
|||||||
ymm1 = _mm256_mulhi_epu16(ymm1, ymm_scale_rb);
|
ymm1 = _mm256_mulhi_epu16(ymm1, ymm_scale_rb);
|
||||||
ymm2 = _mm256_mulhi_epu16(ymm2, ymm_scale_rb);
|
ymm2 = _mm256_mulhi_epu16(ymm2, ymm_scale_rb);
|
||||||
ymm1 = _mm256_slli_epi16(ymm1, 8);
|
ymm1 = _mm256_slli_epi16(ymm1, 8);
|
||||||
ymm1 = _mm256_or_si256(ymm1, ymm2); // RB
|
ymm1 = _mm256_or_si256(ymm1, ymm2); // RB
|
||||||
|
|
||||||
ymm0 = _mm256_and_si256(ymm0, ymm_mask_g);
|
ymm0 = _mm256_and_si256(ymm0, ymm_mask_g);
|
||||||
ymm0 = _mm256_mulhi_epu16(ymm0, ymm_scale_g);
|
ymm0 = _mm256_mulhi_epu16(ymm0, ymm_scale_g);
|
||||||
ymm0 = _mm256_or_si256(ymm0, ymm_mask_a); // GA
|
ymm0 = _mm256_or_si256(ymm0, ymm_mask_a); // GA
|
||||||
|
|
||||||
ymm2 = _mm256_unpacklo_epi8(ymm1, ymm0);
|
ymm2 = _mm256_unpacklo_epi8(ymm1, ymm0);
|
||||||
ymm1 = _mm256_unpackhi_epi8(ymm1, ymm0);
|
ymm1 = _mm256_unpackhi_epi8(ymm1, ymm0);
|
||||||
@ -755,7 +756,8 @@ void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb, int widt
|
|||||||
|
|
||||||
#ifdef HAS_ARGB1555TOARGBROW_AVX2
|
#ifdef HAS_ARGB1555TOARGBROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) {
|
void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
__m256i ymm_scale_rb = _mm256_set1_epi32(0x01080108);
|
__m256i ymm_scale_rb = _mm256_set1_epi32(0x01080108);
|
||||||
__m256i ymm_scale_g = _mm256_set1_epi32(0x42004200);
|
__m256i ymm_scale_g = _mm256_set1_epi32(0x42004200);
|
||||||
__m256i ymm_mask_b = _mm256_set1_epi16((short)0xf800);
|
__m256i ymm_mask_b = _mm256_set1_epi16((short)0xf800);
|
||||||
@ -773,14 +775,14 @@ void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, uint8_t* dst_argb, int
|
|||||||
ymm2 = _mm256_mulhi_epu16(ymm2, ymm_scale_rb);
|
ymm2 = _mm256_mulhi_epu16(ymm2, ymm_scale_rb);
|
||||||
ymm1 = _mm256_mulhi_epu16(ymm1, ymm_scale_rb);
|
ymm1 = _mm256_mulhi_epu16(ymm1, ymm_scale_rb);
|
||||||
ymm1 = _mm256_slli_epi16(ymm1, 8);
|
ymm1 = _mm256_slli_epi16(ymm1, 8);
|
||||||
ymm1 = _mm256_or_si256(ymm1, ymm2); // RB
|
ymm1 = _mm256_or_si256(ymm1, ymm2); // RB
|
||||||
|
|
||||||
ymm2 = ymm0;
|
ymm2 = ymm0;
|
||||||
ymm0 = _mm256_and_si256(ymm0, ymm_mask_g);
|
ymm0 = _mm256_and_si256(ymm0, ymm_mask_g);
|
||||||
ymm2 = _mm256_srai_epi16(ymm2, 8);
|
ymm2 = _mm256_srai_epi16(ymm2, 8);
|
||||||
ymm0 = _mm256_mulhi_epu16(ymm0, ymm_scale_g);
|
ymm0 = _mm256_mulhi_epu16(ymm0, ymm_scale_g);
|
||||||
ymm2 = _mm256_and_si256(ymm2, ymm_mask_a);
|
ymm2 = _mm256_and_si256(ymm2, ymm_mask_a);
|
||||||
ymm0 = _mm256_or_si256(ymm0, ymm2); // GA
|
ymm0 = _mm256_or_si256(ymm0, ymm2); // GA
|
||||||
|
|
||||||
ymm2 = _mm256_unpacklo_epi8(ymm1, ymm0);
|
ymm2 = _mm256_unpacklo_epi8(ymm1, ymm0);
|
||||||
ymm1 = _mm256_unpackhi_epi8(ymm1, ymm0);
|
ymm1 = _mm256_unpackhi_epi8(ymm1, ymm0);
|
||||||
@ -801,7 +803,8 @@ void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, uint8_t* dst_argb, int
|
|||||||
|
|
||||||
#ifdef HAS_ARGB4444TOARGBROW_AVX2
|
#ifdef HAS_ARGB4444TOARGBROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) {
|
void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
__m256i ymm_mask = _mm256_set1_epi32(0x0f0f0f0f);
|
__m256i ymm_mask = _mm256_set1_epi32(0x0f0f0f0f);
|
||||||
__m256i ymm_mask2 = _mm256_slli_epi32(ymm_mask, 4);
|
__m256i ymm_mask2 = _mm256_slli_epi32(ymm_mask, 4);
|
||||||
|
|
||||||
@ -841,27 +844,34 @@ void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, uint8_t* dst_argb, int
|
|||||||
|
|
||||||
#ifdef HAS_RGB24TOARGBROW_AVX2
|
#ifdef HAS_RGB24TOARGBROW_AVX2
|
||||||
LIBYUV_TARGET_AVX2
|
LIBYUV_TARGET_AVX2
|
||||||
void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
|
void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
__m256i ymm_alpha = _mm256_set1_epi32(0xff000000);
|
__m256i ymm_alpha = _mm256_set1_epi32(0xff000000);
|
||||||
__m256i ymm_shuf = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*)kShuffleMaskRGB24ToARGB[0]));
|
__m256i ymm_shuf = _mm256_broadcastsi128_si256(
|
||||||
__m256i ymm_shuf2 = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*)kShuffleMaskRGB24ToARGB[1]));
|
_mm_load_si128((const __m128i*)kShuffleMaskRGB24ToARGB[0]));
|
||||||
|
__m256i ymm_shuf2 = _mm256_broadcastsi128_si256(
|
||||||
|
_mm_load_si128((const __m128i*)kShuffleMaskRGB24ToARGB[1]));
|
||||||
|
|
||||||
while (width > 0) {
|
while (width > 0) {
|
||||||
__m128i xmm0 = _mm_loadu_si128((const __m128i*)src_rgb24);
|
__m128i xmm0 = _mm_loadu_si128((const __m128i*)src_rgb24);
|
||||||
__m256i ymm0 = _mm256_castsi128_si256(xmm0);
|
__m256i ymm0 = _mm256_castsi128_si256(xmm0);
|
||||||
ymm0 = _mm256_inserti128_si256(ymm0, _mm_loadu_si128((const __m128i*)(src_rgb24 + 12)), 1);
|
ymm0 = _mm256_inserti128_si256(
|
||||||
|
ymm0, _mm_loadu_si128((const __m128i*)(src_rgb24 + 12)), 1);
|
||||||
|
|
||||||
__m128i xmm1 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 24));
|
__m128i xmm1 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 24));
|
||||||
__m256i ymm1 = _mm256_castsi128_si256(xmm1);
|
__m256i ymm1 = _mm256_castsi128_si256(xmm1);
|
||||||
ymm1 = _mm256_inserti128_si256(ymm1, _mm_loadu_si128((const __m128i*)(src_rgb24 + 36)), 1);
|
ymm1 = _mm256_inserti128_si256(
|
||||||
|
ymm1, _mm_loadu_si128((const __m128i*)(src_rgb24 + 36)), 1);
|
||||||
|
|
||||||
__m128i xmm2 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 48));
|
__m128i xmm2 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 48));
|
||||||
__m256i ymm2 = _mm256_castsi128_si256(xmm2);
|
__m256i ymm2 = _mm256_castsi128_si256(xmm2);
|
||||||
ymm2 = _mm256_inserti128_si256(ymm2, _mm_loadu_si128((const __m128i*)(src_rgb24 + 60)), 1);
|
ymm2 = _mm256_inserti128_si256(
|
||||||
|
ymm2, _mm_loadu_si128((const __m128i*)(src_rgb24 + 60)), 1);
|
||||||
|
|
||||||
__m128i xmm3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 68));
|
__m128i xmm3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 68));
|
||||||
__m256i ymm3 = _mm256_castsi128_si256(xmm3);
|
__m256i ymm3 = _mm256_castsi128_si256(xmm3);
|
||||||
ymm3 = _mm256_inserti128_si256(ymm3, _mm_loadu_si128((const __m128i*)(src_rgb24 + 80)), 1);
|
ymm3 = _mm256_inserti128_si256(
|
||||||
|
ymm3, _mm_loadu_si128((const __m128i*)(src_rgb24 + 80)), 1);
|
||||||
|
|
||||||
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
|
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
|
||||||
ymm1 = _mm256_shuffle_epi8(ymm1, ymm_shuf);
|
ymm1 = _mm256_shuffle_epi8(ymm1, ymm_shuf);
|
||||||
@ -886,6 +896,46 @@ void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_argb, int width)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBSHUFFLEROW_AVX2
|
||||||
|
LIBYUV_TARGET_AVX2
|
||||||
|
void ARGBShuffleRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb,
|
||||||
|
const uint8_t* shuffler, int width) {
|
||||||
|
__m256i control =
|
||||||
|
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)shuffler));
|
||||||
|
while (width >= 16) {
|
||||||
|
__m256i row = _mm256_loadu_si256((const __m256i*)src_argb);
|
||||||
|
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_argb + 32));
|
||||||
|
row = _mm256_shuffle_epi8(row, control);
|
||||||
|
row1 = _mm256_shuffle_epi8(row1, control);
|
||||||
|
_mm256_storeu_si256((__m256i*)dst_argb, row);
|
||||||
|
_mm256_storeu_si256((__m256i*)(dst_argb + 32), row1);
|
||||||
|
src_argb += 64;
|
||||||
|
dst_argb += 64;
|
||||||
|
width -= 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAS_ARGBSHUFFLEROW_AVX512BW
|
||||||
|
LIBYUV_TARGET_AVX512BW
|
||||||
|
void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb, uint8_t* dst_argb,
|
||||||
|
const uint8_t* shuffler, int width) {
|
||||||
|
__m512i control =
|
||||||
|
_mm512_broadcast_i32x4(_mm_loadu_si128((const __m128i*)shuffler));
|
||||||
|
while (width >= 32) {
|
||||||
|
__m512i row = _mm512_loadu_si512((const __m512i*)src_argb);
|
||||||
|
__m512i row1 = _mm512_loadu_si512((const __m512i*)(src_argb + 64));
|
||||||
|
row = _mm512_shuffle_epi8(row, control);
|
||||||
|
row1 = _mm512_shuffle_epi8(row1, control);
|
||||||
|
_mm512_storeu_si512((__m512i*)dst_argb, row);
|
||||||
|
_mm512_storeu_si512((__m512i*)(dst_argb + 64), row1);
|
||||||
|
src_argb += 128;
|
||||||
|
dst_argb += 128;
|
||||||
|
width -= 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
260
source/scale.cc
260
source/scale.cc
@ -36,15 +36,10 @@ static __inline int Abs(int v) {
|
|||||||
// This is an optimized version for scaling down a plane to 1/2 of
|
// This is an optimized version for scaling down a plane to 1/2 of
|
||||||
// its original size.
|
// its original size.
|
||||||
|
|
||||||
static void ScalePlaneDown2(int src_width,
|
static void ScalePlaneDown2(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
uint8_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr, int dst_width) =
|
uint8_t* dst_ptr, int dst_width) =
|
||||||
@ -148,15 +143,10 @@ static void ScalePlaneDown2(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScalePlaneDown2_16(int src_width,
|
static void ScalePlaneDown2_16(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint16_t* src_ptr,
|
||||||
int dst_height,
|
uint16_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
|
||||||
uint16_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr, int dst_width) =
|
uint16_t* dst_ptr, int dst_width) =
|
||||||
@ -256,15 +246,10 @@ void ScalePlaneDown2_16To8(int src_width,
|
|||||||
// This is an optimized version for scaling down a plane to 1/4 of
|
// This is an optimized version for scaling down a plane to 1/4 of
|
||||||
// its original size.
|
// its original size.
|
||||||
|
|
||||||
static void ScalePlaneDown4(int src_width,
|
static void ScalePlaneDown4(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
uint8_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr, int dst_width) =
|
uint8_t* dst_ptr, int dst_width) =
|
||||||
@ -328,15 +313,10 @@ static void ScalePlaneDown4(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScalePlaneDown4_16(int src_width,
|
static void ScalePlaneDown4_16(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint16_t* src_ptr,
|
||||||
int dst_height,
|
uint16_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
|
||||||
uint16_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr, int dst_width) =
|
uint16_t* dst_ptr, int dst_width) =
|
||||||
@ -372,15 +352,10 @@ static void ScalePlaneDown4_16(int src_width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scale plane down, 3/4
|
// Scale plane down, 3/4
|
||||||
static void ScalePlaneDown34(int src_width,
|
static void ScalePlaneDown34(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
uint8_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr, int dst_width);
|
uint8_t* dst_ptr, int dst_width);
|
||||||
@ -499,15 +474,10 @@ static void ScalePlaneDown34(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScalePlaneDown34_16(int src_width,
|
static void ScalePlaneDown34_16(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint16_t* src_ptr,
|
||||||
int dst_height,
|
uint16_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
|
||||||
uint16_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr, int dst_width);
|
uint16_t* dst_ptr, int dst_width);
|
||||||
@ -585,15 +555,10 @@ static void ScalePlaneDown34_16(int src_width,
|
|||||||
// ggghhhii
|
// ggghhhii
|
||||||
// Boxes are 3x3, 2x3, 3x2 and 2x2
|
// Boxes are 3x3, 2x3, 3x2 and 2x2
|
||||||
|
|
||||||
static void ScalePlaneDown38(int src_width,
|
static void ScalePlaneDown38(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
uint8_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr, int dst_width);
|
uint8_t* dst_ptr, int dst_width);
|
||||||
@ -705,15 +670,10 @@ static void ScalePlaneDown38(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScalePlaneDown38_16(int src_width,
|
static void ScalePlaneDown38_16(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint16_t* src_ptr,
|
||||||
int dst_height,
|
uint16_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
|
||||||
uint16_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int y;
|
int y;
|
||||||
void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint16_t* dst_ptr, int dst_width);
|
uint16_t* dst_ptr, int dst_width);
|
||||||
@ -898,13 +858,9 @@ static void ScaleAddCols1_16_C(int dst_width,
|
|||||||
// one pixel of destination using fixed point (16.16) to step
|
// one pixel of destination using fixed point (16.16) to step
|
||||||
// through source, sampling a box of pixel with simple
|
// through source, sampling a box of pixel with simple
|
||||||
// averaging.
|
// averaging.
|
||||||
static int ScalePlaneBox(int src_width,
|
static int ScalePlaneBox(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr) {
|
uint8_t* dst_ptr) {
|
||||||
int j, k;
|
int j, k;
|
||||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||||
@ -987,13 +943,9 @@ static int ScalePlaneBox(int src_width,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ScalePlaneBox_16(int src_width,
|
static int ScalePlaneBox_16(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint16_t* src_ptr,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
|
||||||
uint16_t* dst_ptr) {
|
uint16_t* dst_ptr) {
|
||||||
int j, k;
|
int j, k;
|
||||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||||
@ -1045,15 +997,10 @@ static int ScalePlaneBox_16(int src_width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scale plane down with bilinear interpolation.
|
// Scale plane down with bilinear interpolation.
|
||||||
static int ScalePlaneBilinearDown(int src_width,
|
static int ScalePlaneBilinearDown(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
uint8_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||||
int x = 0;
|
int x = 0;
|
||||||
int y = 0;
|
int y = 0;
|
||||||
@ -1157,14 +1104,10 @@ static int ScalePlaneBilinearDown(int src_width,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ScalePlaneBilinearDown_16(int src_width,
|
static int ScalePlaneBilinearDown_16(int src_width, int src_height,
|
||||||
int src_height,
|
int dst_width, int dst_height,
|
||||||
int dst_width,
|
ptrdiff_t src_stride, ptrdiff_t dst_stride,
|
||||||
int dst_height,
|
const uint16_t* src_ptr, uint16_t* dst_ptr,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
|
||||||
uint16_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||||
int x = 0;
|
int x = 0;
|
||||||
@ -1249,15 +1192,10 @@ static int ScalePlaneBilinearDown_16(int src_width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scale up down with bilinear interpolation.
|
// Scale up down with bilinear interpolation.
|
||||||
static int ScalePlaneBilinearUp(int src_width,
|
static int ScalePlaneBilinearUp(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
uint8_t* dst_ptr, enum FilterMode filtering) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int j;
|
int j;
|
||||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||||
int x = 0;
|
int x = 0;
|
||||||
@ -1398,13 +1336,9 @@ static int ScalePlaneBilinearUp(int src_width,
|
|||||||
// This is an optimized version for scaling up a plane to 2 times of
|
// This is an optimized version for scaling up a plane to 2 times of
|
||||||
// its original width, using linear interpolation.
|
// its original width, using linear interpolation.
|
||||||
// This is used to scale U and V planes of I422 to I444.
|
// This is used to scale U and V planes of I422 to I444.
|
||||||
static void ScalePlaneUp2_Linear(int src_width,
|
static void ScalePlaneUp2_Linear(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr) {
|
uint8_t* dst_ptr) {
|
||||||
void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
|
void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
|
||||||
ScaleRowUp2_Linear_Any_C;
|
ScaleRowUp2_Linear_Any_C;
|
||||||
@ -1463,13 +1397,9 @@ static void ScalePlaneUp2_Linear(int src_width,
|
|||||||
// This is an optimized version for scaling up a plane to 2 times of
|
// This is an optimized version for scaling up a plane to 2 times of
|
||||||
// its original size, using bilinear interpolation.
|
// its original size, using bilinear interpolation.
|
||||||
// This is used to scale U and V planes of I420 to I444.
|
// This is used to scale U and V planes of I420 to I444.
|
||||||
static void ScalePlaneUp2_Bilinear(int src_width,
|
static void ScalePlaneUp2_Bilinear(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr) {
|
uint8_t* dst_ptr) {
|
||||||
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
|
||||||
uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
|
uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
|
||||||
@ -1529,12 +1459,9 @@ static void ScalePlaneUp2_Bilinear(int src_width,
|
|||||||
// its original width, using linear interpolation.
|
// its original width, using linear interpolation.
|
||||||
// stride is in count of uint16_t.
|
// stride is in count of uint16_t.
|
||||||
// This is used to scale U and V planes of I210 to I410 and I212 to I412.
|
// This is used to scale U and V planes of I210 to I410 and I212 to I412.
|
||||||
static void ScalePlaneUp2_12_Linear(int src_width,
|
static void ScalePlaneUp2_12_Linear(int src_width, int src_height,
|
||||||
int src_height,
|
int dst_width, int dst_height,
|
||||||
int dst_width,
|
ptrdiff_t src_stride, ptrdiff_t dst_stride,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
const uint16_t* src_ptr,
|
||||||
uint16_t* dst_ptr) {
|
uint16_t* dst_ptr) {
|
||||||
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
|
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
|
||||||
@ -1584,10 +1511,8 @@ static void ScalePlaneUp2_12_Linear(int src_width,
|
|||||||
// its original size, using bilinear interpolation.
|
// its original size, using bilinear interpolation.
|
||||||
// stride is in count of uint16_t.
|
// stride is in count of uint16_t.
|
||||||
// This is used to scale U and V planes of I010 to I410 and I012 to I412.
|
// This is used to scale U and V planes of I010 to I410 and I012 to I412.
|
||||||
static void ScalePlaneUp2_12_Bilinear(int src_width,
|
static void ScalePlaneUp2_12_Bilinear(int src_width, int src_height,
|
||||||
int src_height,
|
int dst_width, int dst_height,
|
||||||
int dst_width,
|
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
ptrdiff_t dst_stride,
|
ptrdiff_t dst_stride,
|
||||||
const uint16_t* src_ptr,
|
const uint16_t* src_ptr,
|
||||||
@ -1632,12 +1557,9 @@ static void ScalePlaneUp2_12_Bilinear(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScalePlaneUp2_16_Linear(int src_width,
|
static void ScalePlaneUp2_16_Linear(int src_width, int src_height,
|
||||||
int src_height,
|
int dst_width, int dst_height,
|
||||||
int dst_width,
|
ptrdiff_t src_stride, ptrdiff_t dst_stride,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
const uint16_t* src_ptr,
|
||||||
uint16_t* dst_ptr) {
|
uint16_t* dst_ptr) {
|
||||||
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
|
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
|
||||||
@ -1682,10 +1604,8 @@ static void ScalePlaneUp2_16_Linear(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScalePlaneUp2_16_Bilinear(int src_width,
|
static void ScalePlaneUp2_16_Bilinear(int src_width, int src_height,
|
||||||
int src_height,
|
int dst_width, int dst_height,
|
||||||
int dst_width,
|
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
ptrdiff_t dst_stride,
|
ptrdiff_t dst_stride,
|
||||||
const uint16_t* src_ptr,
|
const uint16_t* src_ptr,
|
||||||
@ -1730,14 +1650,10 @@ static void ScalePlaneUp2_16_Bilinear(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ScalePlaneBilinearUp_16(int src_width,
|
static int ScalePlaneBilinearUp_16(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
ptrdiff_t dst_stride,
|
||||||
const uint16_t* src_ptr,
|
const uint16_t* src_ptr, uint16_t* dst_ptr,
|
||||||
uint16_t* dst_ptr,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int j;
|
int j;
|
||||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||||
@ -1864,13 +1780,9 @@ static int ScalePlaneBilinearUp_16(int src_width,
|
|||||||
// of x and dx is the integer part of the source position and
|
// of x and dx is the integer part of the source position and
|
||||||
// the lower 16 bits are the fixed decimal part.
|
// the lower 16 bits are the fixed decimal part.
|
||||||
|
|
||||||
static void ScalePlaneSimple(int src_width,
|
static void ScalePlaneSimple(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_ptr,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_ptr,
|
|
||||||
uint8_t* dst_ptr) {
|
uint8_t* dst_ptr) {
|
||||||
int i;
|
int i;
|
||||||
void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width,
|
void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width,
|
||||||
@ -1900,13 +1812,9 @@ static void ScalePlaneSimple(int src_width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ScalePlaneSimple_16(int src_width,
|
static void ScalePlaneSimple_16(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint16_t* src_ptr,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_ptr,
|
|
||||||
uint16_t* dst_ptr) {
|
uint16_t* dst_ptr) {
|
||||||
int i;
|
int i;
|
||||||
void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width,
|
void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width,
|
||||||
@ -1951,9 +1859,9 @@ int ScalePlane(const uint8_t* src,
|
|||||||
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
|
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
|
||||||
// This prevents FixedDiv signed integer overflows that can lead to division
|
// This prevents FixedDiv signed integer overflows that can lead to division
|
||||||
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
|
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
|
||||||
if (!src || src_width <= 0 || src_height == 0 ||
|
if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 ||
|
||||||
src_width > 32768 || src_height < -32768 || src_height > 32768 ||
|
src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 ||
|
||||||
!dst || dst_width <= 0 || dst_height <= 0) {
|
dst_height <= 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// Simplify filtering when possible.
|
// Simplify filtering when possible.
|
||||||
@ -2059,9 +1967,9 @@ int ScalePlane_16(const uint16_t* src,
|
|||||||
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
|
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
|
||||||
// This prevents FixedDiv signed integer overflows that can lead to division
|
// This prevents FixedDiv signed integer overflows that can lead to division
|
||||||
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
|
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
|
||||||
if (!src || src_width <= 0 || src_height == 0 ||
|
if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 ||
|
||||||
src_width > 32768 || src_height < -32768 || src_height > 32768 ||
|
src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 ||
|
||||||
!dst || dst_width <= 0 || dst_height <= 0) {
|
dst_height <= 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// Simplify filtering when possible.
|
// Simplify filtering when possible.
|
||||||
@ -2171,9 +2079,9 @@ int ScalePlane_12(const uint16_t* src,
|
|||||||
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
|
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
|
||||||
// This prevents FixedDiv signed integer overflows that can lead to division
|
// This prevents FixedDiv signed integer overflows that can lead to division
|
||||||
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
|
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
|
||||||
if (!src || src_width <= 0 || src_height == 0 ||
|
if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 ||
|
||||||
src_width > 32768 || src_height < -32768 || src_height > 32768 ||
|
src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 ||
|
||||||
!dst || dst_width <= 0 || dst_height <= 0) {
|
dst_height <= 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// Simplify filtering when possible.
|
// Simplify filtering when possible.
|
||||||
|
|||||||
@ -34,18 +34,10 @@ static __inline int Abs(int v) {
|
|||||||
// ScaleARGB ARGB, 1/2
|
// ScaleARGB ARGB, 1/2
|
||||||
// This is an optimized version for scaling down a ARGB to 1/2 of
|
// This is an optimized version for scaling down a ARGB to 1/2 of
|
||||||
// its original size.
|
// its original size.
|
||||||
static void ScaleARGBDown2(int src_width,
|
static void ScaleARGBDown2(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_argb,
|
||||||
int dst_height,
|
uint8_t* dst_argb, int x, int dx, int y, int dy,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_argb,
|
|
||||||
uint8_t* dst_argb,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int j;
|
int j;
|
||||||
ptrdiff_t row_stride = src_stride * (dy >> 16);
|
ptrdiff_t row_stride = src_stride * (dy >> 16);
|
||||||
@ -148,18 +140,10 @@ static void ScaleARGBDown2(int src_width,
|
|||||||
// ScaleARGB ARGB, 1/4
|
// ScaleARGB ARGB, 1/4
|
||||||
// This is an optimized version for scaling down a ARGB to 1/4 of
|
// This is an optimized version for scaling down a ARGB to 1/4 of
|
||||||
// its original size.
|
// its original size.
|
||||||
static int ScaleARGBDown4Box(int src_width,
|
static int ScaleARGBDown4Box(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_argb,
|
||||||
int dst_height,
|
uint8_t* dst_argb, int x, int dx, int y, int dy) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_argb,
|
|
||||||
uint8_t* dst_argb,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy) {
|
|
||||||
int j;
|
int j;
|
||||||
// Allocate 2 rows of ARGB.
|
// Allocate 2 rows of ARGB.
|
||||||
const int row_size = (dst_width * 2 * 4 + 31) & ~31;
|
const int row_size = (dst_width * 2 * 4 + 31) & ~31;
|
||||||
@ -222,18 +206,10 @@ static int ScaleARGBDown4Box(int src_width,
|
|||||||
// ScaleARGB ARGB Even
|
// ScaleARGB ARGB Even
|
||||||
// This is an optimized version for scaling down a ARGB to even
|
// This is an optimized version for scaling down a ARGB to even
|
||||||
// multiple of its original size.
|
// multiple of its original size.
|
||||||
static void ScaleARGBDownEven(int src_width,
|
static void ScaleARGBDownEven(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_argb,
|
||||||
int dst_height,
|
uint8_t* dst_argb, int x, int dx, int y, int dy,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_argb,
|
|
||||||
uint8_t* dst_argb,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int j;
|
int j;
|
||||||
int col_step = dx >> 16;
|
int col_step = dx >> 16;
|
||||||
@ -298,19 +274,11 @@ static void ScaleARGBDownEven(int src_width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scale ARGB down with bilinear interpolation.
|
// Scale ARGB down with bilinear interpolation.
|
||||||
static int ScaleARGBBilinearDown(int src_width,
|
static int ScaleARGBBilinearDown(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_argb,
|
||||||
int dst_height,
|
uint8_t* dst_argb, int x, int dx, int y,
|
||||||
ptrdiff_t src_stride,
|
int dy, enum FilterMode filtering) {
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_argb,
|
|
||||||
uint8_t* dst_argb,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy,
|
|
||||||
enum FilterMode filtering) {
|
|
||||||
int j;
|
int j;
|
||||||
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
|
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||||
ptrdiff_t src_stride, int dst_width,
|
ptrdiff_t src_stride, int dst_width,
|
||||||
@ -425,18 +393,10 @@ static int ScaleARGBBilinearDown(int src_width,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scale ARGB up with bilinear interpolation.
|
// Scale ARGB up with bilinear interpolation.
|
||||||
static int ScaleARGBBilinearUp(int src_width,
|
static int ScaleARGBBilinearUp(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_argb,
|
||||||
int dst_height,
|
uint8_t* dst_argb, int x, int dx, int y, int dy,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_argb,
|
|
||||||
uint8_t* dst_argb,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int j;
|
int j;
|
||||||
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
|
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||||
@ -604,18 +564,10 @@ static int ScaleARGBBilinearUp(int src_width,
|
|||||||
// of x and dx is the integer part of the source position and
|
// of x and dx is the integer part of the source position and
|
||||||
// the lower 16 bits are the fixed decimal part.
|
// the lower 16 bits are the fixed decimal part.
|
||||||
|
|
||||||
static void ScaleARGBSimple(int src_width,
|
static void ScaleARGBSimple(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_argb,
|
||||||
int dst_height,
|
uint8_t* dst_argb, int x, int dx, int y, int dy) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_argb,
|
|
||||||
uint8_t* dst_argb,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy) {
|
|
||||||
int j;
|
int j;
|
||||||
void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb,
|
void (*ScaleARGBCols)(uint8_t* dst_argb, const uint8_t* src_argb,
|
||||||
int dst_width, int x, int dx) =
|
int dst_width, int x, int dx) =
|
||||||
|
|||||||
@ -56,18 +56,10 @@ static __inline int Abs(int v) {
|
|||||||
// This is an optimized version for scaling down a UV to 1/2 of
|
// This is an optimized version for scaling down a UV to 1/2 of
|
||||||
// its original size.
|
// its original size.
|
||||||
#if HAS_SCALEUVDOWN2
|
#if HAS_SCALEUVDOWN2
|
||||||
static void ScaleUVDown2(int src_width,
|
static void ScaleUVDown2(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_uv,
|
||||||
int dst_height,
|
uint8_t* dst_uv, int x, int dx, int y, int dy,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_uv,
|
|
||||||
uint8_t* dst_uv,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int j;
|
int j;
|
||||||
ptrdiff_t row_stride = src_stride * (dy >> 16);
|
ptrdiff_t row_stride = src_stride * (dy >> 16);
|
||||||
@ -171,18 +163,10 @@ static void ScaleUVDown2(int src_width,
|
|||||||
// This is an optimized version for scaling down a UV to 1/4 of
|
// This is an optimized version for scaling down a UV to 1/4 of
|
||||||
// its original size.
|
// its original size.
|
||||||
#if HAS_SCALEUVDOWN4BOX
|
#if HAS_SCALEUVDOWN4BOX
|
||||||
static int ScaleUVDown4Box(int src_width,
|
static int ScaleUVDown4Box(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_uv,
|
||||||
int dst_height,
|
uint8_t* dst_uv, int x, int dx, int y, int dy) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_uv,
|
|
||||||
uint8_t* dst_uv,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy) {
|
|
||||||
int j;
|
int j;
|
||||||
// Allocate 2 rows of UV.
|
// Allocate 2 rows of UV.
|
||||||
const int row_size = (dst_width * 2 * 2 + 15) & ~15;
|
const int row_size = (dst_width * 2 * 2 + 15) & ~15;
|
||||||
@ -253,18 +237,10 @@ static int ScaleUVDown4Box(int src_width,
|
|||||||
// This is an optimized version for scaling down a UV to even
|
// This is an optimized version for scaling down a UV to even
|
||||||
// multiple of its original size.
|
// multiple of its original size.
|
||||||
#if HAS_SCALEUVDOWNEVEN
|
#if HAS_SCALEUVDOWNEVEN
|
||||||
static void ScaleUVDownEven(int src_width,
|
static void ScaleUVDownEven(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_uv,
|
||||||
int dst_height,
|
uint8_t* dst_uv, int x, int dx, int y, int dy,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_uv,
|
|
||||||
uint8_t* dst_uv,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int j;
|
int j;
|
||||||
int col_step = dx >> 16;
|
int col_step = dx >> 16;
|
||||||
@ -331,18 +307,10 @@ static void ScaleUVDownEven(int src_width,
|
|||||||
|
|
||||||
// Scale UV down with bilinear interpolation.
|
// Scale UV down with bilinear interpolation.
|
||||||
#if HAS_SCALEUVBILINEARDOWN
|
#if HAS_SCALEUVBILINEARDOWN
|
||||||
static int ScaleUVBilinearDown(int src_width,
|
static int ScaleUVBilinearDown(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_uv,
|
||||||
int dst_height,
|
uint8_t* dst_uv, int x, int dx, int y, int dy,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_uv,
|
|
||||||
uint8_t* dst_uv,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int j;
|
int j;
|
||||||
void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
|
void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
|
||||||
@ -445,18 +413,10 @@ static int ScaleUVBilinearDown(int src_width,
|
|||||||
|
|
||||||
// Scale UV up with bilinear interpolation.
|
// Scale UV up with bilinear interpolation.
|
||||||
#if HAS_SCALEUVBILINEARUP
|
#if HAS_SCALEUVBILINEARUP
|
||||||
static int ScaleUVBilinearUp(int src_width,
|
static int ScaleUVBilinearUp(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_uv,
|
||||||
int dst_height,
|
uint8_t* dst_uv, int x, int dx, int y, int dy,
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_uv,
|
|
||||||
uint8_t* dst_uv,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy,
|
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int j;
|
int j;
|
||||||
void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
|
void (*InterpolateRow)(uint8_t* dst_uv, const uint8_t* src_uv,
|
||||||
@ -603,13 +563,9 @@ static int ScaleUVBilinearUp(int src_width,
|
|||||||
// This is an optimized version for scaling up a plane to 2 times of
|
// This is an optimized version for scaling up a plane to 2 times of
|
||||||
// its original width, using linear interpolation.
|
// its original width, using linear interpolation.
|
||||||
// This is used to scale U and V planes of NV16 to NV24.
|
// This is used to scale U and V planes of NV16 to NV24.
|
||||||
static void ScaleUVLinearUp2(int src_width,
|
static void ScaleUVLinearUp2(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_uv,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_uv,
|
|
||||||
uint8_t* dst_uv) {
|
uint8_t* dst_uv) {
|
||||||
void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) =
|
void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) =
|
||||||
ScaleUVRowUp2_Linear_Any_C;
|
ScaleUVRowUp2_Linear_Any_C;
|
||||||
@ -723,13 +679,9 @@ static void ScaleUVBilinearUp2(int src_width,
|
|||||||
// This is an optimized version for scaling up a plane to 2 times of
|
// This is an optimized version for scaling up a plane to 2 times of
|
||||||
// its original width, using linear interpolation.
|
// its original width, using linear interpolation.
|
||||||
// This is used to scale U and V planes of P210 to P410.
|
// This is used to scale U and V planes of P210 to P410.
|
||||||
static void ScaleUVLinearUp2_16(int src_width,
|
static void ScaleUVLinearUp2_16(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint16_t* src_uv,
|
||||||
int dst_height,
|
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint16_t* src_uv,
|
|
||||||
uint16_t* dst_uv) {
|
uint16_t* dst_uv) {
|
||||||
void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
|
void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
|
||||||
ScaleUVRowUp2_Linear_16_Any_C;
|
ScaleUVRowUp2_Linear_16_Any_C;
|
||||||
@ -831,18 +783,10 @@ static void ScaleUVBilinearUp2_16(int src_width,
|
|||||||
// of x and dx is the integer part of the source position and
|
// of x and dx is the integer part of the source position and
|
||||||
// the lower 16 bits are the fixed decimal part.
|
// the lower 16 bits are the fixed decimal part.
|
||||||
|
|
||||||
static void ScaleUVSimple(int src_width,
|
static void ScaleUVSimple(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, ptrdiff_t src_stride,
|
||||||
int dst_width,
|
ptrdiff_t dst_stride, const uint8_t* src_uv,
|
||||||
int dst_height,
|
uint8_t* dst_uv, int x, int dx, int y, int dy) {
|
||||||
ptrdiff_t src_stride,
|
|
||||||
ptrdiff_t dst_stride,
|
|
||||||
const uint8_t* src_uv,
|
|
||||||
uint8_t* dst_uv,
|
|
||||||
int x,
|
|
||||||
int dx,
|
|
||||||
int y,
|
|
||||||
int dy) {
|
|
||||||
int j;
|
int j;
|
||||||
void (*ScaleUVCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width,
|
void (*ScaleUVCols)(uint8_t* dst_uv, const uint8_t* src_uv, int dst_width,
|
||||||
int x, int dx) =
|
int x, int dx) =
|
||||||
|
|||||||
@ -464,8 +464,7 @@ static void YUVFToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
|
|||||||
static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
|
static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
|
||||||
double y1 = (y - 16) * 1.164384;
|
double y1 = (y - 16) * 1.164384;
|
||||||
*r = RoundToByte(y1 - (v - 128) * -1.67867);
|
*r = RoundToByte(y1 - (v - 128) * -1.67867);
|
||||||
*g = RoundToByte(y1 - (u - 128) * 0.187326 -
|
*g = RoundToByte(y1 - (u - 128) * 0.187326 - (v - 128) * 0.65042);
|
||||||
(v - 128) * 0.65042);
|
|
||||||
*b = RoundToByte(y1 - (u - 128) * -2.14177);
|
*b = RoundToByte(y1 - (u - 128) * -2.14177);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -82,15 +82,19 @@ namespace libyuv {
|
|||||||
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
|
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
|
||||||
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
|
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
|
||||||
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
|
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
|
align_buffer_page_end(src_y, \
|
||||||
|
kPaddedWidth * kPaddedHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end( \
|
align_buffer_page_end( \
|
||||||
src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \
|
src_uv, \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
|
kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC * 2 + OFF); \
|
||||||
align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
align_buffer_page_end(dst_u_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
|
align_buffer_page_end(dst_v_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
align_buffer_page_end(dst_u_opt, \
|
||||||
|
kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
|
align_buffer_page_end(dst_v_opt, \
|
||||||
|
kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
|
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
|
||||||
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
|
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
|
||||||
for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
|
for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
|
||||||
@ -101,12 +105,12 @@ namespace libyuv {
|
|||||||
src_uv_p[i] = \
|
src_uv_p[i] = \
|
||||||
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
|
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
|
||||||
} \
|
} \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
|
memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \
|
||||||
memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
memset(dst_u_c, 2, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
memset(dst_v_c, 3, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
|
memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \
|
||||||
memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
memset(dst_u_opt, 102, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
memset(dst_v_opt, 103, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||||
src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
|
src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
|
||||||
@ -223,11 +227,11 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
|
|||||||
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \
|
||||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||||
src_y[i + OFF] = (fastrand() & 0xff); \
|
src_y[i + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
@ -381,58 +385,58 @@ TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
|
|||||||
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
|
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
|
||||||
W1280, N, NEG, OFF) \
|
W1280, N, NEG, OFF) \
|
||||||
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
|
||||||
const int kWidth = W1280; \
|
const int kWidth = W1280; \
|
||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
const int kStrideB = kWidth * BPP_B; \
|
const int kStrideB = kWidth * BPP_B; \
|
||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_uv, \
|
align_buffer_page_end( \
|
||||||
kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
|
src_uv, kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kWidth; ++j) \
|
for (int j = 0; j < kWidth; ++j) \
|
||||||
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
|
||||||
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
|
||||||
for (int j = 0; j < kStrideUV * 2; ++j) { \
|
for (int j = 0; j < kStrideUV * 2; ++j) { \
|
||||||
src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \
|
src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
memset(dst_argb_c, 1, kStrideB* kHeight); \
|
memset(dst_argb_c, 1, kStrideB * kHeight); \
|
||||||
memset(dst_argb_opt, 101, kStrideB* kHeight); \
|
memset(dst_argb_opt, 101, kStrideB * kHeight); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \
|
||||||
dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \
|
dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \
|
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \
|
||||||
dst_argb_opt, kWidth * BPP_B, kWidth, \
|
dst_argb_opt, kWidth * BPP_B, kWidth, \
|
||||||
NEG kHeight); \
|
NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
|
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
|
||||||
align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \
|
align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \
|
||||||
align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \
|
align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \
|
||||||
memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \
|
memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \
|
||||||
memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \
|
memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \
|
||||||
FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
|
FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
|
||||||
kHeight); \
|
kHeight); \
|
||||||
FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
|
FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
|
||||||
kHeight); \
|
kHeight); \
|
||||||
for (int i = 0; i < kHeight; ++i) { \
|
for (int i = 0; i < kHeight; ++i) { \
|
||||||
for (int j = 0; j < kWidth * 4; ++j) { \
|
for (int j = 0; j < kWidth * 4; ++j) { \
|
||||||
ASSERT_EQ(dst_argb32_c[i * kWidth * 4 + j], \
|
ASSERT_EQ(dst_argb32_c[i * kWidth * 4 + j], \
|
||||||
dst_argb32_opt[i * kWidth * 4 + j]); \
|
dst_argb32_opt[i * kWidth * 4 + j]); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
free_aligned_buffer_page_end(src_y); \
|
free_aligned_buffer_page_end(src_y); \
|
||||||
free_aligned_buffer_page_end(src_uv); \
|
free_aligned_buffer_page_end(src_uv); \
|
||||||
free_aligned_buffer_page_end(dst_argb_c); \
|
free_aligned_buffer_page_end(dst_argb_c); \
|
||||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||||
free_aligned_buffer_page_end(dst_argb32_c); \
|
free_aligned_buffer_page_end(dst_argb32_c); \
|
||||||
free_aligned_buffer_page_end(dst_argb32_opt); \
|
free_aligned_buffer_page_end(dst_argb32_opt); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(ENABLE_FULL_TESTS)
|
#if defined(ENABLE_FULL_TESTS)
|
||||||
@ -507,15 +511,16 @@ TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
|
|||||||
const int kStrideB = \
|
const int kStrideB = \
|
||||||
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||||
align_buffer_page_end(src_argb, \
|
align_buffer_page_end(src_argb, \
|
||||||
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
|
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
|
align_buffer_page_end(dst_argb_c, \
|
||||||
|
kStrideB * kHeightB * (int)sizeof(TYPE_B)); \
|
||||||
align_buffer_page_end(dst_argb_opt, \
|
align_buffer_page_end(dst_argb_opt, \
|
||||||
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
|
kStrideB * kHeightB * (int)sizeof(TYPE_B)); \
|
||||||
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
||||||
src_argb[i + OFF] = (fastrand() & 0xff); \
|
src_argb[i + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
memset(dst_argb_c, 1, kStrideB* kHeightB); \
|
memset(dst_argb_c, 1, kStrideB * kHeightB); \
|
||||||
memset(dst_argb_opt, 101, kStrideB* kHeightB); \
|
memset(dst_argb_opt, 101, kStrideB * kHeightB); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \
|
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \
|
||||||
kStrideB, kWidth, NEG kHeight); \
|
kStrideB, kWidth, NEG kHeight); \
|
||||||
@ -532,41 +537,42 @@ TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
|
|||||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
|
#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
|
||||||
TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
|
TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
|
||||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
|
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
|
||||||
for (int times = 0; times < benchmark_iterations_; ++times) { \
|
for (int times = 0; times < benchmark_iterations_; ++times) { \
|
||||||
const int kWidth = (fastrand() & 63) + 1; \
|
const int kWidth = (fastrand() & 63) + 1; \
|
||||||
const int kHeight = (fastrand() & 31) + 1; \
|
const int kHeight = (fastrand() & 31) + 1; \
|
||||||
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
|
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
|
||||||
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
|
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
|
||||||
const int kStrideA = \
|
const int kStrideA = \
|
||||||
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||||
const int kStrideB = \
|
const int kStrideB = \
|
||||||
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||||
align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
|
align_buffer_page_end(src_argb, \
|
||||||
align_buffer_page_end(dst_argb_c, \
|
kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
|
||||||
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
|
align_buffer_page_end(dst_argb_c, \
|
||||||
align_buffer_page_end(dst_argb_opt, \
|
kStrideB * kHeightB * (int)sizeof(TYPE_B)); \
|
||||||
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
|
align_buffer_page_end(dst_argb_opt, \
|
||||||
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
kStrideB * kHeightB * (int)sizeof(TYPE_B)); \
|
||||||
src_argb[i] = 0xfe; \
|
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
||||||
} \
|
src_argb[i] = 0xfe; \
|
||||||
memset(dst_argb_c, 123, kStrideB* kHeightB); \
|
} \
|
||||||
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
|
memset(dst_argb_c, 123, kStrideB * kHeightB); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
memset(dst_argb_opt, 123, kStrideB * kHeightB); \
|
||||||
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
kStrideB, kWidth, kHeight); \
|
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
|
||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
kStrideB, kWidth, kHeight); \
|
||||||
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
kStrideB, kWidth, kHeight); \
|
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
|
||||||
for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
|
kStrideB, kWidth, kHeight); \
|
||||||
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
|
for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
|
||||||
} \
|
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
|
||||||
free_aligned_buffer_page_end(src_argb); \
|
} \
|
||||||
free_aligned_buffer_page_end(dst_argb_c); \
|
free_aligned_buffer_page_end(src_argb); \
|
||||||
free_aligned_buffer_page_end(dst_argb_opt); \
|
free_aligned_buffer_page_end(dst_argb_c); \
|
||||||
} \
|
free_aligned_buffer_page_end(dst_argb_opt); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(ENABLE_FULL_TESTS)
|
#if defined(ENABLE_FULL_TESTS)
|
||||||
@ -672,11 +678,11 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
|
|||||||
const int kStrideB = \
|
const int kStrideB = \
|
||||||
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||||
align_buffer_page_end(src_argb, \
|
align_buffer_page_end(src_argb, \
|
||||||
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
|
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, \
|
align_buffer_page_end(dst_argb_c, \
|
||||||
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
|
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
|
||||||
align_buffer_page_end(dst_argb_opt, \
|
align_buffer_page_end(dst_argb_opt, \
|
||||||
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
|
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
|
||||||
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
||||||
src_argb[i + OFF] = (fastrand() & 0xff); \
|
src_argb[i + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
@ -791,14 +797,14 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
|
|||||||
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||||
const int kStrideB = \
|
const int kStrideB = \
|
||||||
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||||
align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \
|
align_buffer_page_end(src_argb, kStrideA * kHeightA + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \
|
||||||
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
|
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
|
||||||
src_argb[i + OFF] = (fastrand() & 0xff); \
|
src_argb[i + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
memset(dst_argb_c, 1, kStrideB* kHeightB); \
|
memset(dst_argb_c, 1, kStrideB * kHeightB); \
|
||||||
memset(dst_argb_opt, 101, kStrideB* kHeightB); \
|
memset(dst_argb_opt, 101, kStrideB * kHeightB); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \
|
FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \
|
||||||
NULL, kWidth, NEG kHeight); \
|
NULL, kWidth, NEG kHeight); \
|
||||||
@ -827,14 +833,14 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
|
|||||||
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||||
const int kStrideB = \
|
const int kStrideB = \
|
||||||
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||||
align_buffer_page_end(src_argb, kStrideA* kHeightA); \
|
align_buffer_page_end(src_argb, kStrideA * kHeightA); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \
|
||||||
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
|
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
|
||||||
src_argb[i] = (fastrand() & 0xff); \
|
src_argb[i] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
memset(dst_argb_c, 123, kStrideB* kHeightB); \
|
memset(dst_argb_c, 123, kStrideB * kHeightB); \
|
||||||
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
|
memset(dst_argb_opt, 123, kStrideB * kHeightB); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \
|
FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \
|
||||||
kWidth, kHeight); \
|
kWidth, kHeight); \
|
||||||
@ -885,15 +891,16 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
|
|||||||
const int kStrideA = \
|
const int kStrideA = \
|
||||||
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||||
align_buffer_page_end(src_argb, \
|
align_buffer_page_end(src_argb, \
|
||||||
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
|
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
|
align_buffer_page_end(dst_argb_c, \
|
||||||
|
kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
|
||||||
align_buffer_page_end(dst_argb_opt, \
|
align_buffer_page_end(dst_argb_opt, \
|
||||||
kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
|
kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
|
||||||
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
||||||
src_argb[i + OFF] = (fastrand() & 0xff); \
|
src_argb[i + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
memset(dst_argb_c, 1, kStrideA* kHeightA); \
|
memset(dst_argb_c, 1, kStrideA * kHeightA); \
|
||||||
memset(dst_argb_opt, 101, kStrideA* kHeightA); \
|
memset(dst_argb_opt, 101, kStrideA * kHeightA); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \
|
FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \
|
||||||
kStrideA, kWidth, NEG kHeight); \
|
kStrideA, kWidth, NEG kHeight); \
|
||||||
@ -945,12 +952,12 @@ TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
|
|||||||
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(src_a, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_a, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \
|
||||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||||
src_y[i + OFF] = (fastrand() & 0xff); \
|
src_y[i + OFF] = (fastrand() & 0xff); \
|
||||||
src_a[i + OFF] = (fastrand() & 0xff); \
|
src_a[i + OFF] = (fastrand() & 0xff); \
|
||||||
@ -1240,11 +1247,11 @@ TEST_F(LibYUVConvertTest, TestDither) {
|
|||||||
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
|
||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \
|
||||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||||
src_y[i + OFF] = (fastrand() & 0xff); \
|
src_y[i + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
@ -1265,10 +1272,10 @@ TEST_F(LibYUVConvertTest, TestDither) {
|
|||||||
dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \
|
dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \
|
||||||
} \
|
} \
|
||||||
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
|
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
|
||||||
align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \
|
align_buffer_page_end(dst_argb32_c, kWidth * BPP_C * kHeight); \
|
||||||
align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \
|
align_buffer_page_end(dst_argb32_opt, kWidth * BPP_C * kHeight); \
|
||||||
memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight); \
|
memset(dst_argb32_c, 2, kWidth * BPP_C * kHeight); \
|
||||||
memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight); \
|
memset(dst_argb32_opt, 102, kWidth * BPP_C * kHeight); \
|
||||||
FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \
|
FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \
|
||||||
kWidth, kHeight); \
|
kWidth, kHeight); \
|
||||||
FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \
|
FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \
|
||||||
@ -1317,10 +1324,10 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
|
|||||||
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
|
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
|
||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \
|
||||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||||
src_y[i + OFF] = (fastrand() & 0xff); \
|
src_y[i + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
@ -1334,8 +1341,8 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
|
|||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
|
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
|
||||||
const int kStrideC = kWidth * BPP_C; \
|
const int kStrideC = kWidth * BPP_C; \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \
|
||||||
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
||||||
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
@ -1464,14 +1471,14 @@ TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
|
|||||||
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
|
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
|
||||||
const int kSizeUV = \
|
const int kSizeUV = \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
align_buffer_page_end(src_u, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
align_buffer_page_end(src_v, kSizeUV + OFF); \
|
||||||
align_buffer_page_end(src_a, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_a, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \
|
||||||
const int kStrideC = kWidth * BPP_C; \
|
const int kStrideC = kWidth * BPP_C; \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \
|
||||||
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
||||||
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
|
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
|
||||||
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
||||||
@ -1578,16 +1585,16 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
|
|||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
|
const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
|
||||||
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
|
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
|
||||||
align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
|
align_buffer_page_end(src_argb_a, kStrideA * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \
|
||||||
MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \
|
MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \
|
||||||
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
|
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
|
||||||
FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \
|
FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \
|
||||||
kWidth, NEG kHeight); \
|
kWidth, NEG kHeight); \
|
||||||
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
|
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
|
||||||
const int kStrideC = kWidth * BPP_C; \
|
const int kStrideC = kWidth * BPP_C; \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \
|
||||||
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
|
||||||
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
@ -1798,11 +1805,11 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
|
|||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
const int kBpc = 2; \
|
const int kBpc = 2; \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight * kBpc + SOFF); \
|
||||||
align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
|
align_buffer_page_end(src_u, kSizeUV * kBpc + SOFF); \
|
||||||
align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
|
align_buffer_page_end(src_v, kSizeUV * kBpc + SOFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + DOFF); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + DOFF); \
|
||||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||||
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \
|
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \
|
||||||
} \
|
} \
|
||||||
@ -1913,12 +1920,12 @@ TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
|
|||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
const int kBpc = 2; \
|
const int kBpc = 2; \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight * kBpc + OFF); \
|
||||||
align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \
|
align_buffer_page_end(src_u, kSizeUV * kBpc + OFF); \
|
||||||
align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \
|
align_buffer_page_end(src_v, kSizeUV * kBpc + OFF); \
|
||||||
align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF); \
|
align_buffer_page_end(src_a, kWidth * kHeight * kBpc + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \
|
||||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||||
reinterpret_cast<uint16_t*>(src_y + OFF)[i] = \
|
reinterpret_cast<uint16_t*>(src_y + OFF)[i] = \
|
||||||
(fastrand() & ((1 << S_DEPTH) - 1)); \
|
(fastrand() & ((1 << S_DEPTH) - 1)); \
|
||||||
@ -2146,10 +2153,10 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
|
|||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \
|
||||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \
|
||||||
const int kBpc = 2; \
|
const int kBpc = 2; \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight * kBpc + SOFF); \
|
||||||
align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF); \
|
align_buffer_page_end(src_uv, kSizeUV * kBpc + SOFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + DOFF); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + DOFF); \
|
||||||
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
for (int i = 0; i < kWidth * kHeight; ++i) { \
|
||||||
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \
|
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \
|
||||||
(fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \
|
(fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \
|
||||||
@ -2839,8 +2846,12 @@ TEST_F(LibYUVConvertTest, TestARGBToUVMatrixRow_Opt) {
|
|||||||
|
|
||||||
int half_width = (width + 1) / 2;
|
int half_width = (width + 1) / 2;
|
||||||
for (int i = 0; i < half_width; ++i) {
|
for (int i = 0; i < half_width; ++i) {
|
||||||
ASSERT_EQ(dest_u_c[i], dest_u_opt[i]) << "u mismatch at " << i << " width " << width << " height " << height;
|
ASSERT_EQ(dest_u_c[i], dest_u_opt[i])
|
||||||
ASSERT_EQ(dest_v_c[i], dest_v_opt[i]) << "v mismatch at " << i << " width " << width << " height " << height;
|
<< "u mismatch at " << i << " width " << width << " height "
|
||||||
|
<< height;
|
||||||
|
ASSERT_EQ(dest_v_c[i], dest_v_opt[i])
|
||||||
|
<< "v mismatch at " << i << " width " << width << " height "
|
||||||
|
<< height;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2903,13 +2914,12 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
|
|||||||
free_aligned_buffer_page_end(dest_argb);
|
free_aligned_buffer_page_end(dest_argb);
|
||||||
free_aligned_buffer_page_end(orig_i400);
|
free_aligned_buffer_page_end(orig_i400);
|
||||||
}
|
}
|
||||||
#endif // DISABLE_SLOW_TESTS
|
#endif // DISABLE_SLOW_TESTS
|
||||||
#endif // !defined(DISABLE_SLOW_TESTS) && \
|
#endif // !defined(DISABLE_SLOW_TESTS) && \
|
||||||
// (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__))
|
// (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__))
|
||||||
|
|
||||||
#endif // !defined(LEAN_TESTS)
|
#endif // !defined(LEAN_TESTS)
|
||||||
|
|
||||||
|
|
||||||
#define TESTATOBPI(FMT_A, TYPE_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, SUBSAMP_X, \
|
#define TESTATOBPI(FMT_A, TYPE_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, SUBSAMP_X, \
|
||||||
SUBSAMP_Y, W1280, N, NEG, OFF) \
|
SUBSAMP_Y, W1280, N, NEG, OFF) \
|
||||||
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
|
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
|
||||||
@ -2922,17 +2932,17 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
|
|||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \
|
||||||
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_argb, \
|
align_buffer_page_end(src_argb, \
|
||||||
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
|
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kStrideY* kHeight); \
|
align_buffer_page_end(dst_y_c, kStrideY * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_c, kSizeUV); \
|
align_buffer_page_end(dst_uv_c, kSizeUV); \
|
||||||
align_buffer_page_end(dst_y_opt, kStrideY* kHeight); \
|
align_buffer_page_end(dst_y_opt, kStrideY * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_opt, kSizeUV); \
|
align_buffer_page_end(dst_uv_opt, kSizeUV); \
|
||||||
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
|
||||||
src_argb[i + OFF] = (fastrand() & 0xff); \
|
src_argb[i + OFF] = (fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
memset(dst_y_c, 1, kStrideY* kHeight); \
|
memset(dst_y_c, 1, kStrideY * kHeight); \
|
||||||
memset(dst_uv_c, 2, kSizeUV); \
|
memset(dst_uv_c, 2, kSizeUV); \
|
||||||
memset(dst_y_opt, 101, kStrideY* kHeight); \
|
memset(dst_y_opt, 101, kStrideY * kHeight); \
|
||||||
memset(dst_uv_opt, 102, kSizeUV); \
|
memset(dst_uv_opt, 102, kSizeUV); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, dst_y_c, kStrideY, \
|
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, dst_y_c, kStrideY, \
|
||||||
|
|||||||
@ -78,17 +78,19 @@ namespace libyuv {
|
|||||||
const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
||||||
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
|
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
|
||||||
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
|
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end(src_u, \
|
align_buffer_page_end(src_u, \
|
||||||
kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
|
kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end(src_v, \
|
align_buffer_page_end(src_v, \
|
||||||
kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
|
kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
align_buffer_page_end(dst_u_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
align_buffer_page_end(dst_v_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
align_buffer_page_end(dst_u_opt, \
|
||||||
align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
|
align_buffer_page_end(dst_v_opt, \
|
||||||
|
kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
|
MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
|
||||||
MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
||||||
MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
||||||
@ -102,12 +104,12 @@ namespace libyuv {
|
|||||||
src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
|
src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
|
||||||
src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
|
src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
|
||||||
} \
|
} \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
|
memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \
|
||||||
memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
memset(dst_u_c, 2, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
memset(dst_v_c, 3, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
|
memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \
|
||||||
memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
memset(dst_u_opt, 102, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
|
memset(dst_v_opt, 103, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||||
src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
|
src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \
|
||||||
@ -212,15 +214,15 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
|
|||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
const int kSizeUV = \
|
const int kSizeUV = \
|
||||||
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_uv, \
|
align_buffer_page_end(src_uv, \
|
||||||
kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
|
kSizeUV * ((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||||
@ -239,12 +241,12 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12)
|
|||||||
(fastrand() & 0xff); \
|
(fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
memset(dst_y_c, 1, kWidth * kHeight); \
|
||||||
memset(dst_u_c, 2, \
|
memset(dst_u_c, 2, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_v_c, 3, \
|
memset(dst_v_c, 3, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
memset(dst_y_opt, 101, kWidth * kHeight); \
|
||||||
memset(dst_u_opt, 102, \
|
memset(dst_u_opt, 102, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_v_opt, 103, \
|
memset(dst_v_opt, 103, \
|
||||||
@ -359,17 +361,17 @@ static int I400ToNV21(const uint8_t* src_y,
|
|||||||
const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
||||||
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
|
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
|
||||||
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
|
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end(src_u, \
|
align_buffer_page_end(src_u, \
|
||||||
kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
|
kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end(src_v, \
|
align_buffer_page_end(src_v, \
|
||||||
kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
|
kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_uv_c, \
|
align_buffer_page_end(dst_uv_c, \
|
||||||
kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
|
kDstHalfWidth * kDstHalfHeight * DST_BPC * 2); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_uv_opt, \
|
align_buffer_page_end(dst_uv_opt, \
|
||||||
kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
|
kDstHalfWidth * kDstHalfHeight * DST_BPC * 2); \
|
||||||
MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
|
MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
|
||||||
MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
||||||
MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
|
||||||
@ -383,10 +385,10 @@ static int I400ToNV21(const uint8_t* src_y,
|
|||||||
src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
|
src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \
|
||||||
src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
|
src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \
|
||||||
} \
|
} \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
|
memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \
|
||||||
memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
|
memset(dst_uv_c, 2, kDstHalfWidth * kDstHalfHeight * DST_BPC * 2); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
|
memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \
|
||||||
memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \
|
memset(dst_uv_opt, 102, kDstHalfWidth * kDstHalfHeight * DST_BPC * 2); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \
|
SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \
|
||||||
src_v_p, kSrcHalfWidth, \
|
src_v_p, kSrcHalfWidth, \
|
||||||
@ -478,14 +480,15 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
|
|||||||
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
|
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
|
||||||
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
|
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
|
||||||
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
|
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
|
align_buffer_page_end(src_y, \
|
||||||
|
kPaddedWidth * kPaddedHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end( \
|
align_buffer_page_end( \
|
||||||
src_uv, \
|
src_uv, \
|
||||||
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
|
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_uv_c, \
|
align_buffer_page_end(dst_uv_c, \
|
||||||
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \
|
||||||
align_buffer_page_end(dst_uv_opt, \
|
align_buffer_page_end(dst_uv_opt, \
|
||||||
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
|
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
|
||||||
@ -502,13 +505,13 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
|
|||||||
src_uv_p[i] = \
|
src_uv_p[i] = \
|
||||||
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
|
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
|
||||||
} \
|
} \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
|
memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \
|
||||||
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
|
memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \
|
||||||
memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||||
src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
|
src_y_p, kWidth * SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
|
||||||
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
|
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
|
||||||
DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
|
DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
|
||||||
reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
|
reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
|
||||||
@ -516,7 +519,7 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
|
|||||||
MaskCpuFlags(benchmark_cpu_info_); \
|
MaskCpuFlags(benchmark_cpu_info_); \
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||||
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
SRC_FMT_PLANAR##To##FMT_PLANAR( \
|
||||||
src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
|
src_y_p, kWidth * SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
|
||||||
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
|
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
|
||||||
DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
|
DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
|
||||||
reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
|
reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
|
||||||
@ -598,16 +601,16 @@ TESTBPTOBP(P010, uint16_t, 2, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
|
|||||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
|
const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
|
||||||
align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
|
align_buffer_page_end(src_argb, kStride * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_c, \
|
align_buffer_page_end(dst_uv_c, \
|
||||||
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_opt, \
|
align_buffer_page_end(dst_uv_opt, \
|
||||||
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
memset(dst_y_c, 1, kWidth * kHeight); \
|
||||||
memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
memset(dst_y_opt, 101, kWidth * kHeight); \
|
||||||
memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kStride; ++j) \
|
for (int j = 0; j < kStride; ++j) \
|
||||||
@ -691,20 +694,20 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
|
|||||||
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
|
||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
|
const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
|
||||||
align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
|
align_buffer_page_end(src_argb, kStride * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_a_c, kWidth* kHeight); \
|
align_buffer_page_end(dst_a_c, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_c, \
|
align_buffer_page_end(dst_uv_c, \
|
||||||
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_a_opt, kWidth* kHeight); \
|
align_buffer_page_end(dst_a_opt, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_opt, \
|
align_buffer_page_end(dst_uv_opt, \
|
||||||
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_a_c, 1, kWidth* kHeight); \
|
memset(dst_a_c, 1, kWidth * kHeight); \
|
||||||
memset(dst_y_c, 2, kWidth* kHeight); \
|
memset(dst_y_c, 2, kWidth * kHeight); \
|
||||||
memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_a_opt, 101, kWidth* kHeight); \
|
memset(dst_a_opt, 101, kWidth * kHeight); \
|
||||||
memset(dst_y_opt, 102, kWidth* kHeight); \
|
memset(dst_y_opt, 102, kWidth * kHeight); \
|
||||||
memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kStride; ++j) \
|
for (int j = 0; j < kStride; ++j) \
|
||||||
@ -765,19 +768,19 @@ TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2)
|
|||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
|
const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
|
||||||
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
|
||||||
align_buffer_page_end(src_argb, kStride* kHeight + OFF); \
|
align_buffer_page_end(src_argb, kStride * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_c, \
|
align_buffer_page_end(dst_uv_c, \
|
||||||
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_opt, \
|
align_buffer_page_end(dst_uv_opt, \
|
||||||
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
for (int i = 0; i < kHeight; ++i) \
|
for (int i = 0; i < kHeight; ++i) \
|
||||||
for (int j = 0; j < kStride; ++j) \
|
for (int j = 0; j < kStride; ++j) \
|
||||||
src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
|
src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
memset(dst_y_c, 1, kWidth * kHeight); \
|
||||||
memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
memset(dst_y_opt, 101, kWidth * kHeight); \
|
||||||
memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
|
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \
|
||||||
@ -1950,17 +1953,17 @@ TEST_F(LibYUVConvertTest, I420CropOddY) {
|
|||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
\
|
\
|
||||||
align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \
|
align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \
|
||||||
align_buffer_page_end(orig_y, kWidth* kHeight); \
|
align_buffer_page_end(orig_y, kWidth * kHeight); \
|
||||||
align_buffer_page_end(orig_u, \
|
align_buffer_page_end(orig_u, \
|
||||||
SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
|
SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
|
||||||
align_buffer_page_end(orig_v, \
|
align_buffer_page_end(orig_v, \
|
||||||
SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
|
SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
|
||||||
\
|
\
|
||||||
align_buffer_page_end(dst_y_orig, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_orig, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_uv_orig, \
|
align_buffer_page_end(dst_uv_orig, \
|
||||||
2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
|
2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
|
||||||
\
|
\
|
||||||
align_buffer_page_end(dst_y, kWidth* kHeight); \
|
align_buffer_page_end(dst_y, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_uv, \
|
align_buffer_page_end(dst_uv, \
|
||||||
2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
|
2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \
|
||||||
\
|
\
|
||||||
@ -2423,6 +2426,129 @@ TEST_F(LibYUVConvertTest, TestARGBToI444Matrix) {
|
|||||||
free_aligned_buffer_page_end(ref_v);
|
free_aligned_buffer_page_end(ref_v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename ConvertToYUV, typename ConvertToARGB>
|
||||||
|
static void TestRGBToI420(ConvertToYUV convert_to_yuv,
|
||||||
|
ConvertToARGB convert_to_argb, int width, int height,
|
||||||
|
int disable_cpu_flags, int benchmark_cpu_info) {
|
||||||
|
align_buffer_page_end(src_rgb, width * height * 4);
|
||||||
|
align_buffer_page_end(dst_y, width * height);
|
||||||
|
align_buffer_page_end(dst_u, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
align_buffer_page_end(dst_v, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
|
||||||
|
align_buffer_page_end(tmp_argb, width * height * 4);
|
||||||
|
align_buffer_page_end(ref_y, width * height);
|
||||||
|
align_buffer_page_end(ref_u, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
align_buffer_page_end(ref_v, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
|
||||||
|
MemRandomize(src_rgb, width * height * 4);
|
||||||
|
|
||||||
|
{
|
||||||
|
SCOPED_TRACE("C_Version");
|
||||||
|
MaskCpuFlags(disable_cpu_flags);
|
||||||
|
|
||||||
|
// Clear buffers
|
||||||
|
memset(dst_y, 0, width * height);
|
||||||
|
memset(dst_u, 0, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
memset(dst_v, 0, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
memset(ref_y, 0, width * height);
|
||||||
|
memset(ref_u, 0, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
memset(ref_v, 0, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
memset(tmp_argb, 0, width * height * 4);
|
||||||
|
|
||||||
|
int r1 =
|
||||||
|
convert_to_yuv(src_rgb, width * 4, dst_y, width, dst_u, (width + 1) / 2,
|
||||||
|
dst_v, (width + 1) / 2, width, height);
|
||||||
|
ASSERT_EQ(r1, 0);
|
||||||
|
|
||||||
|
int r2 =
|
||||||
|
convert_to_argb(src_rgb, width * 4, tmp_argb, width * 4, width, height);
|
||||||
|
ASSERT_EQ(r2, 0);
|
||||||
|
|
||||||
|
int r3 = ARGBToI420(tmp_argb, width * 4, ref_y, width, ref_u,
|
||||||
|
(width + 1) / 2, ref_v, (width + 1) / 2, width, height);
|
||||||
|
ASSERT_EQ(r3, 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < width * height; ++i) {
|
||||||
|
ASSERT_EQ(dst_y[i], ref_y[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < (width + 1) / 2 * (height + 1) / 2; ++i) {
|
||||||
|
ASSERT_EQ(dst_u[i], ref_u[i]);
|
||||||
|
ASSERT_EQ(dst_v[i], ref_v[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
SCOPED_TRACE("SIMD_Version");
|
||||||
|
MaskCpuFlags(benchmark_cpu_info);
|
||||||
|
|
||||||
|
// Clear buffers
|
||||||
|
memset(dst_y, 0, width * height);
|
||||||
|
memset(dst_u, 0, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
memset(dst_v, 0, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
memset(ref_y, 0, width * height);
|
||||||
|
memset(ref_u, 0, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
memset(ref_v, 0, (width + 1) / 2 * (height + 1) / 2);
|
||||||
|
memset(tmp_argb, 0, width * height * 4);
|
||||||
|
|
||||||
|
int r1 =
|
||||||
|
convert_to_yuv(src_rgb, width * 4, dst_y, width, dst_u, (width + 1) / 2,
|
||||||
|
dst_v, (width + 1) / 2, width, height);
|
||||||
|
ASSERT_EQ(r1, 0);
|
||||||
|
|
||||||
|
int r2 =
|
||||||
|
convert_to_argb(src_rgb, width * 4, tmp_argb, width * 4, width, height);
|
||||||
|
ASSERT_EQ(r2, 0);
|
||||||
|
|
||||||
|
int r3 = ARGBToI420(tmp_argb, width * 4, ref_y, width, ref_u,
|
||||||
|
(width + 1) / 2, ref_v, (width + 1) / 2, width, height);
|
||||||
|
ASSERT_EQ(r3, 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < width * height; ++i) {
|
||||||
|
ASSERT_EQ(dst_y[i], ref_y[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < (width + 1) / 2 * (height + 1) / 2; ++i) {
|
||||||
|
ASSERT_EQ(dst_u[i], ref_u[i]);
|
||||||
|
ASSERT_EQ(dst_v[i], ref_v[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free_aligned_buffer_page_end(src_rgb);
|
||||||
|
free_aligned_buffer_page_end(dst_y);
|
||||||
|
free_aligned_buffer_page_end(dst_u);
|
||||||
|
free_aligned_buffer_page_end(dst_v);
|
||||||
|
free_aligned_buffer_page_end(tmp_argb);
|
||||||
|
free_aligned_buffer_page_end(ref_y);
|
||||||
|
free_aligned_buffer_page_end(ref_u);
|
||||||
|
free_aligned_buffer_page_end(ref_v);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVConvertTest, BGRAToI420_Check) {
|
||||||
|
TestRGBToI420(BGRAToI420, BGRAToARGB, 16, 16, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
TestRGBToI420(BGRAToI420, BGRAToARGB, 17, 17, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
TestRGBToI420(BGRAToI420, BGRAToARGB, 1280, 720, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVConvertTest, RGBAToI420_Check) {
|
||||||
|
TestRGBToI420(RGBAToI420, RGBAToARGB, 16, 16, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
TestRGBToI420(RGBAToI420, RGBAToARGB, 17, 17, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
TestRGBToI420(RGBAToI420, RGBAToARGB, 1280, 720, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVConvertTest, ABGRToI420_Check) {
|
||||||
|
TestRGBToI420(ABGRToI420, ABGRToARGB, 16, 16, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
TestRGBToI420(ABGRToI420, ABGRToARGB, 17, 17, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
TestRGBToI420(ABGRToI420, ABGRToARGB, 1280, 720, disable_cpu_flags_,
|
||||||
|
benchmark_cpu_info_);
|
||||||
|
}
|
||||||
|
|
||||||
#endif // !defined(LEAN_TESTS)
|
#endif // !defined(LEAN_TESTS)
|
||||||
|
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -1212,10 +1212,10 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
|
|||||||
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
|
||||||
const int kStrideB = \
|
const int kStrideB = \
|
||||||
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
|
||||||
align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
|
align_buffer_page_end(src_argb_a, kStrideA * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \
|
align_buffer_page_end(src_argb_b, kStrideA * kHeight + OFF); \
|
||||||
align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
|
align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \
|
||||||
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
|
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \
|
||||||
for (int i = 0; i < kStrideA * kHeight; ++i) { \
|
for (int i = 0; i < kStrideA * kHeight; ++i) { \
|
||||||
src_argb_a[i + OFF] = (fastrand() & 0xff); \
|
src_argb_a[i + OFF] = (fastrand() & 0xff); \
|
||||||
src_argb_b[i + OFF] = (fastrand() & 0xff); \
|
src_argb_b[i + OFF] = (fastrand() & 0xff); \
|
||||||
|
|||||||
@ -495,15 +495,15 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
|
|||||||
const int kHeight = benchmark_height_; \
|
const int kHeight = benchmark_height_; \
|
||||||
const int kSizeUV = \
|
const int kSizeUV = \
|
||||||
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
|
||||||
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
|
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
|
||||||
align_buffer_page_end(src_uv, \
|
align_buffer_page_end(src_uv, \
|
||||||
kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
|
kSizeUV * ((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
|
||||||
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_c, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
|
align_buffer_page_end(dst_y_opt, kWidth * kHeight); \
|
||||||
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||||
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
|
||||||
@ -522,12 +522,12 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
|
|||||||
(fastrand() & 0xff); \
|
(fastrand() & 0xff); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
memset(dst_y_c, 1, kWidth* kHeight); \
|
memset(dst_y_c, 1, kWidth * kHeight); \
|
||||||
memset(dst_u_c, 2, \
|
memset(dst_u_c, 2, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_v_c, 3, \
|
memset(dst_v_c, 3, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_y_opt, 101, kWidth* kHeight); \
|
memset(dst_y_opt, 101, kWidth * kHeight); \
|
||||||
memset(dst_u_opt, 102, \
|
memset(dst_u_opt, 102, \
|
||||||
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
|
||||||
memset(dst_v_opt, 103, \
|
memset(dst_v_opt, 103, \
|
||||||
|
|||||||
@ -430,14 +430,10 @@ static void FillRamp(uint8_t* buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||||
static void YUVToARGBTestFilter(int src_width,
|
static void YUVToARGBTestFilter(int src_width, int src_height, int dst_width,
|
||||||
int src_height,
|
int dst_height, FilterMode f,
|
||||||
int dst_width,
|
int benchmark_iterations, int error_threshold,
|
||||||
int dst_height,
|
int* max_diff_out) {
|
||||||
FilterMode f,
|
|
||||||
int benchmark_iterations,
|
|
||||||
int error_threshold,
|
|
||||||
int* max_diff_out) {
|
|
||||||
int64_t src_y_plane_size = Abs(src_width) * Abs(src_height);
|
int64_t src_y_plane_size = Abs(src_width) * Abs(src_height);
|
||||||
int64_t src_uv_plane_size =
|
int64_t src_uv_plane_size =
|
||||||
((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2);
|
((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2);
|
||||||
@ -516,10 +512,10 @@ TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) {
|
|||||||
|
|
||||||
TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
|
TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
|
||||||
int diff = 0;
|
int diff = 0;
|
||||||
YUVToARGBTestFilter(
|
YUVToARGBTestFilter(benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2,
|
||||||
benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_,
|
benchmark_width_, benchmark_height_,
|
||||||
benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_, 10,
|
libyuv::kFilterBilinear, benchmark_iterations_, 10,
|
||||||
&diff);
|
&diff);
|
||||||
ASSERT_LE(diff, 10);
|
ASSERT_LE(diff, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user