mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-04-30 19:09:18 +08:00
clang-format libyuv
BUG=libyuv:654 R=kjellander@chromium.org Review URL: https://codereview.chromium.org/2469353005 .
This commit is contained in:
parent
f2c27dafa2
commit
e62309f259
6
.clang-format
Normal file
6
.clang-format
Normal file
@ -0,0 +1,6 @@
|
||||
# Defines the Chromium style for automatic reformatting.
|
||||
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
|
||||
BasedOnStyle: Chromium
|
||||
---
|
||||
Language: Java
|
||||
BasedOnStyle: Google
|
||||
@ -26,31 +26,31 @@
|
||||
typedef unsigned __int64 uint64;
|
||||
typedef __int64 int64;
|
||||
#ifndef INT64_C
|
||||
#define INT64_C(x) x ## I64
|
||||
#define INT64_C(x) x##I64
|
||||
#endif
|
||||
#ifndef UINT64_C
|
||||
#define UINT64_C(x) x ## UI64
|
||||
#define UINT64_C(x) x##UI64
|
||||
#endif
|
||||
#define INT64_F "I64"
|
||||
#else // COMPILER_MSVC
|
||||
#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
|
||||
typedef unsigned long uint64; // NOLINT
|
||||
typedef long int64; // NOLINT
|
||||
typedef long int64; // NOLINT
|
||||
#ifndef INT64_C
|
||||
#define INT64_C(x) x ## L
|
||||
#define INT64_C(x) x##L
|
||||
#endif
|
||||
#ifndef UINT64_C
|
||||
#define UINT64_C(x) x ## UL
|
||||
#define UINT64_C(x) x##UL
|
||||
#endif
|
||||
#define INT64_F "l"
|
||||
#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
|
||||
typedef unsigned long long uint64; // NOLINT
|
||||
typedef long long int64; // NOLINT
|
||||
typedef long long int64; // NOLINT
|
||||
#ifndef INT64_C
|
||||
#define INT64_C(x) x ## LL
|
||||
#define INT64_C(x) x##LL
|
||||
#endif
|
||||
#ifndef UINT64_C
|
||||
#define UINT64_C(x) x ## ULL
|
||||
#define UINT64_C(x) x##ULL
|
||||
#endif
|
||||
#define INT64_F "ll"
|
||||
#endif // __LP64__
|
||||
@ -58,15 +58,15 @@ typedef long long int64; // NOLINT
|
||||
typedef unsigned int uint32;
|
||||
typedef int int32;
|
||||
typedef unsigned short uint16; // NOLINT
|
||||
typedef short int16; // NOLINT
|
||||
typedef short int16; // NOLINT
|
||||
typedef unsigned char uint8;
|
||||
typedef signed char int8;
|
||||
#endif // INT_TYPES_DEFINED
|
||||
#endif // GG_LONGLONG
|
||||
|
||||
// Detect compiler is for x86 or x64.
|
||||
#if defined(__x86_64__) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(_M_IX86)
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(_M_IX86)
|
||||
#define CPU_X86 1
|
||||
#endif
|
||||
// Detect compiler is for ARM.
|
||||
@ -76,12 +76,12 @@ typedef signed char int8;
|
||||
|
||||
#ifndef ALIGNP
|
||||
#ifdef __cplusplus
|
||||
#define ALIGNP(p, t) \
|
||||
(reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
|
||||
((t) - 1)) & ~((t) - 1))))
|
||||
#define ALIGNP(p, t) \
|
||||
reinterpret_cast<uint8*>( \
|
||||
((reinterpret_cast<uintptr_t>(p) + ((t)-1)) & ~((t)-1)))
|
||||
#else
|
||||
#define ALIGNP(p, t) \
|
||||
((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */
|
||||
(uint8*)((((uintptr_t)(p) + ((t)-1)) & ~((t)-1))) /* NOLINT */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -95,9 +95,9 @@ typedef signed char int8;
|
||||
#define LIBYUV_API
|
||||
#endif // LIBYUV_BUILDING_SHARED_LIBRARY
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
|
||||
(defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
|
||||
defined(LIBYUV_USING_SHARED_LIBRARY))
|
||||
#define LIBYUV_API __attribute__ ((visibility ("default")))
|
||||
(defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
|
||||
defined(LIBYUV_USING_SHARED_LIBRARY))
|
||||
#define LIBYUV_API __attribute__((visibility("default")))
|
||||
#else
|
||||
#define LIBYUV_API
|
||||
#endif // __GNUC__
|
||||
@ -108,10 +108,9 @@ typedef signed char int8;
|
||||
#define LIBYUV_TRUE 1
|
||||
|
||||
// Visual C x86 or GCC little endian.
|
||||
#if defined(__x86_64__) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(__arm__) || defined(_M_ARM) || \
|
||||
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
|
||||
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
#define LIBYUV_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
|
||||
@ -29,13 +29,15 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
|
||||
|
||||
// Sum Square Error - used to compute Mean Square Error or PSNR.
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareError(const uint8* src_a,
|
||||
const uint8* src_b, int count);
|
||||
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, int count);
|
||||
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a,
|
||||
int stride_a,
|
||||
const uint8* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
static const int kMaxPsnr = 128;
|
||||
|
||||
@ -43,32 +45,52 @@ LIBYUV_API
|
||||
double SumSquareErrorToPsnr(uint64 sse, uint64 count);
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFramePsnr(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
double CalcFramePsnr(const uint8* src_a,
|
||||
int stride_a,
|
||||
const uint8* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double I420Psnr(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height);
|
||||
double I420Psnr(const uint8* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFrameSsim(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
double CalcFrameSsim(const uint8* src_a,
|
||||
int stride_a,
|
||||
const uint8* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double I420Ssim(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height);
|
||||
double I420Ssim(const uint8* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -30,8 +30,8 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
||||
defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
|
||||
_MSC_VER >= 1700
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
|
||||
@ -42,8 +42,8 @@ extern "C" {
|
||||
#endif // clang >= 3.4
|
||||
#endif // __clang__
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
defined(_M_IX86) && (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||
#define HAS_HASHDJB2_AVX2
|
||||
#endif
|
||||
|
||||
|
||||
@ -16,8 +16,8 @@
|
||||
#include "libyuv/rotate.h" // For enum RotationMode.
|
||||
|
||||
// TODO(fbarchard): fix WebRTC source to include following libyuv headers:
|
||||
#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620
|
||||
#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620
|
||||
#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620
|
||||
#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620
|
||||
#include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -27,185 +27,295 @@ extern "C" {
|
||||
|
||||
// Convert I444 to I420.
|
||||
LIBYUV_API
|
||||
int I444ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I444ToI420(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to I420.
|
||||
LIBYUV_API
|
||||
int I422ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I422ToI420(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I420 to I420.
|
||||
#define I420ToI420 I420Copy
|
||||
LIBYUV_API
|
||||
int I420Copy(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I420Copy(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I400 (grey) to I420.
|
||||
LIBYUV_API
|
||||
int I400ToI420(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I400ToI420(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define J400ToJ420 I400ToI420
|
||||
|
||||
// Convert NV12 to I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int NV12ToI420(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to I420.
|
||||
LIBYUV_API
|
||||
int NV21ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_vu, int src_stride_vu,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int NV21ToI420(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert YUY2 to I420.
|
||||
LIBYUV_API
|
||||
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int YUY2ToI420(const uint8* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert UYVY to I420.
|
||||
LIBYUV_API
|
||||
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int UYVYToI420(const uint8* src_uyvy,
|
||||
int src_stride_uyvy,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert M420 to I420.
|
||||
LIBYUV_API
|
||||
int M420ToI420(const uint8* src_m420, int src_stride_m420,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int M420ToI420(const uint8* src_m420,
|
||||
int src_stride_m420,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert Android420 to I420.
|
||||
LIBYUV_API
|
||||
int Android420ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int Android420ToI420(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
int pixel_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ARGB little endian (bgra in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int BGRAToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ABGR little endian (rgba in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ABGRToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGBA little endian (abgr in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RGBAToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int RGBAToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB little endian (bgr in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int RGB24ToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RAWToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int RAWToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB16 (RGBP fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int RGB565ToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB15 (RGBO fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGB1555ToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB12 (R444 fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGB4444ToI420(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
// src_width/height provided by capture.
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8* sample, size_t sample_size,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height,
|
||||
int dst_width, int dst_height);
|
||||
int MJPGToI420(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
int* width, int* height);
|
||||
int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height);
|
||||
#endif
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
@ -231,13 +341,20 @@ int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
// "format" is a fourcc. ie 'I420', 'YUY2'
|
||||
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8* src_frame, size_t src_size,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
int ConvertToI420(const uint8* src_frame,
|
||||
size_t src_size,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 format);
|
||||
|
||||
|
||||
@ -30,246 +30,384 @@ extern "C" {
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBCopy(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to ARGB.
|
||||
LIBYUV_API
|
||||
int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Duplicate prototype for function in convert_from.h for remoting.
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I422ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I444 to ARGB.
|
||||
LIBYUV_API
|
||||
int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I444ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J444 to ARGB.
|
||||
LIBYUV_API
|
||||
int J444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int J444ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I444 to ABGR.
|
||||
LIBYUV_API
|
||||
int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int I444ToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int attenuate);
|
||||
int I420AlphaToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
const uint8* src_a,
|
||||
int src_stride_a,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height,
|
||||
int attenuate);
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ABGR.
|
||||
LIBYUV_API
|
||||
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height, int attenuate);
|
||||
int I420AlphaToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
const uint8* src_a,
|
||||
int src_stride_a,
|
||||
uint8* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height,
|
||||
int attenuate);
|
||||
|
||||
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
|
||||
LIBYUV_API
|
||||
int I400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I400ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J400 (jpeg grey) to ARGB.
|
||||
LIBYUV_API
|
||||
int J400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int J400ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Alias.
|
||||
#define YToARGB I400ToARGB
|
||||
|
||||
// Convert NV12 to ARGB.
|
||||
LIBYUV_API
|
||||
int NV12ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int NV12ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to ARGB.
|
||||
LIBYUV_API
|
||||
int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_vu, int src_stride_vu,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int NV21ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert M420 to ARGB.
|
||||
LIBYUV_API
|
||||
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int M420ToARGB(const uint8* src_m420,
|
||||
int src_stride_m420,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert YUY2 to ARGB.
|
||||
LIBYUV_API
|
||||
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int YUY2ToARGB(const uint8* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert UYVY to ARGB.
|
||||
LIBYUV_API
|
||||
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int UYVYToARGB(const uint8* src_uyvy,
|
||||
int src_stride_uyvy,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J420 to ARGB.
|
||||
LIBYUV_API
|
||||
int J420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int J420ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J422 to ARGB.
|
||||
LIBYUV_API
|
||||
int J422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int J422ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J420 to ABGR.
|
||||
LIBYUV_API
|
||||
int J420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int J420ToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J422 to ABGR.
|
||||
LIBYUV_API
|
||||
int J422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int J422ToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H420 to ARGB.
|
||||
LIBYUV_API
|
||||
int H420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int H420ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H422 to ARGB.
|
||||
LIBYUV_API
|
||||
int H422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int H422ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H420 to ABGR.
|
||||
LIBYUV_API
|
||||
int H420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int H420ToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H422 to ABGR.
|
||||
LIBYUV_API
|
||||
int H422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int H422ToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int BGRAToARGB(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ABGR little endian (rgba in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ABGRToARGB(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGBA little endian (abgr in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int RGBAToARGB(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Deprecated function name.
|
||||
#define BG24ToARGB RGB24ToARGB
|
||||
|
||||
// RGB little endian (bgr in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int RGB24ToARGB(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RAWToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int RAWToARGB(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB16 (RGBP fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int RGB565ToARGB(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB15 (RGBO fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGB1555ToARGB(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB12 (R444 fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGB4444ToARGB(const uint8* src_frame,
|
||||
int src_stride_frame,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
// src_width/height provided by capture
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8* sample, size_t sample_size,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width, int src_height,
|
||||
int dst_width, int dst_height);
|
||||
int MJPGToARGB(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
#endif
|
||||
|
||||
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
|
||||
@ -295,11 +433,16 @@ int MJPGToARGB(const uint8* sample, size_t sample_size,
|
||||
// "format" is a fourcc. ie 'I420', 'YUY2'
|
||||
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
|
||||
LIBYUV_API
|
||||
int ConvertToARGB(const uint8* src_frame, size_t src_size,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
int ConvertToARGB(const uint8* src_frame,
|
||||
size_t src_size,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 format);
|
||||
|
||||
|
||||
@ -24,142 +24,237 @@ extern "C" {
|
||||
// I420Copy in convert to I420ToI420.
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI422(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I420ToI422(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I420ToI444(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
|
||||
LIBYUV_API
|
||||
int I400Copy(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int I400Copy(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV12(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
int I420ToNV12(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV21(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
int I420ToNV21(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToYUY2(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToUYVY(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToARGB(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToBGRA(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height);
|
||||
int I420ToRGBA(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToRGB24(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToRAW(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToRGB565(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
|
||||
// Values in dither matrix from 0 to 7 recommended.
|
||||
// The order of the dither matrix is first byte is upper left.
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
const uint8* dither4x4, int width, int height);
|
||||
int I420ToRGB565Dither(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
const uint8* dither4x4,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToARGB1555(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToARGB4444(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to specified format.
|
||||
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
|
||||
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
|
||||
LIBYUV_API
|
||||
int ConvertFromI420(const uint8* y, int y_stride,
|
||||
const uint8* u, int u_stride,
|
||||
const uint8* v, int v_stride,
|
||||
uint8* dst_sample, int dst_sample_stride,
|
||||
int width, int height,
|
||||
int ConvertFromI420(const uint8* y,
|
||||
int y_stride,
|
||||
const uint8* u,
|
||||
int u_stride,
|
||||
const uint8* v,
|
||||
int v_stride,
|
||||
uint8* dst_sample,
|
||||
int dst_sample_stride,
|
||||
int width,
|
||||
int height,
|
||||
uint32 format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -21,45 +21,66 @@ extern "C" {
|
||||
// Copy ARGB to ARGB.
|
||||
#define ARGBToARGB ARGBCopy
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBCopy(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To BGRA.
|
||||
LIBYUV_API
|
||||
int ARGBToBGRA(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height);
|
||||
int ARGBToBGRA(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_bgra,
|
||||
int dst_stride_bgra,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ABGR.
|
||||
LIBYUV_API
|
||||
int ARGBToABGR(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int ARGBToABGR(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGBA.
|
||||
LIBYUV_API
|
||||
int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height);
|
||||
int ARGBToRGBA(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGB24.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height);
|
||||
int ARGBToRGB24(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RAW.
|
||||
LIBYUV_API
|
||||
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb, int dst_stride_rgb,
|
||||
int width, int height);
|
||||
int ARGBToRAW(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgb,
|
||||
int dst_stride_rgb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGB565.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height);
|
||||
int ARGBToRGB565(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
|
||||
// Values in dither matrix from 0 to 7 recommended.
|
||||
@ -67,112 +88,174 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
|
||||
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
|
||||
// const uint8(*dither)[4][4];
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
const uint8* dither4x4, int width, int height);
|
||||
int ARGBToRGB565Dither(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
const uint8* dither4x4,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ARGB1555.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb1555, int dst_stride_argb1555,
|
||||
int width, int height);
|
||||
int ARGBToARGB1555(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb1555,
|
||||
int dst_stride_argb1555,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ARGB4444.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb4444, int dst_stride_argb4444,
|
||||
int width, int height);
|
||||
int ARGBToARGB4444(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb4444,
|
||||
int dst_stride_argb4444,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To I444.
|
||||
LIBYUV_API
|
||||
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToI444(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To I422.
|
||||
LIBYUV_API
|
||||
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToI422(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To I420. (also in convert.h)
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToI420(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J420. (JPeg full range I420).
|
||||
LIBYUV_API
|
||||
int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToJ420(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J422.
|
||||
LIBYUV_API
|
||||
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToJ422(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J400. (JPeg full range).
|
||||
LIBYUV_API
|
||||
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
int width, int height);
|
||||
int ARGBToJ400(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to I400.
|
||||
LIBYUV_API
|
||||
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int ARGBToI400(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
|
||||
LIBYUV_API
|
||||
int ARGBToG(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_g, int dst_stride_g,
|
||||
int width, int height);
|
||||
int ARGBToG(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_g,
|
||||
int dst_stride_g,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV12.
|
||||
LIBYUV_API
|
||||
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
int ARGBToNV12(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV21.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
int ARGBToNV21(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV21.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
int ARGBToNV21(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To YUY2.
|
||||
LIBYUV_API
|
||||
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yuy2, int dst_stride_yuy2,
|
||||
int width, int height);
|
||||
int ARGBToYUY2(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To UYVY.
|
||||
LIBYUV_API
|
||||
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_uyvy, int dst_stride_uyvy,
|
||||
int width, int height);
|
||||
int ARGBToUYVY(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -12,88 +12,86 @@
|
||||
#define INCLUDE_LIBYUV_MACROS_MSA_H_
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include <stdint.h>
|
||||
#include <msa.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if (__mips_isa_rev >= 6)
|
||||
#define LW(psrc) ({ \
|
||||
uint8* psrc_lw_m = (uint8*) (psrc); /* NOLINT */ \
|
||||
uint32 val_m; \
|
||||
asm volatile ( \
|
||||
"lw %[val_m], %[psrc_lw_m] \n\t" \
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [psrc_lw_m] "m" (*psrc_lw_m) \
|
||||
); \
|
||||
val_m; \
|
||||
#define LW(psrc) \
|
||||
({ \
|
||||
uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \
|
||||
uint32 val_m; \
|
||||
asm volatile("lw %[val_m], %[psrc_lw_m] \n\t" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_lw_m] "m"(*psrc_lw_m)); \
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) ({ \
|
||||
uint8* psrc_ld_m = (uint8*) (psrc); /* NOLINT */ \
|
||||
uint64 val_m = 0; \
|
||||
asm volatile ( \
|
||||
"ld %[val_m], %[psrc_ld_m] \n\t" \
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [psrc_ld_m] "m" (*psrc_ld_m) \
|
||||
); \
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) ({ \
|
||||
uint8* psrc_ld_m = (uint8*) (psrc); /* NOLINT */ \
|
||||
uint32 val0_m, val1_m; \
|
||||
uint64 val_m = 0; \
|
||||
val0_m = LW(psrc_ld_m); \
|
||||
val1_m = LW(psrc_ld_m + 4); \
|
||||
val_m = (uint64) (val1_m); /* NOLINT */ \
|
||||
val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
||||
val_m = (uint64) (val_m | (uint64) val0_m); /* NOLINT */ \
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
#else // !(__mips_isa_rev >= 6)
|
||||
#define LW(psrc) ({ \
|
||||
uint8* psrc_lw_m = (uint8*) (psrc); /* NOLINT */ \
|
||||
uint32 val_m; \
|
||||
asm volatile ( \
|
||||
"ulw %[val_m], %[psrc_lw_m] \n\t" \
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [psrc_lw_m] "m" (*psrc_lw_m) \
|
||||
); \
|
||||
val_m; \
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
|
||||
uint64 val_m = 0; \
|
||||
asm volatile("ld %[val_m], %[psrc_ld_m] \n\t" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_ld_m] "m"(*psrc_ld_m)); \
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
|
||||
uint32 val0_m, val1_m; \
|
||||
uint64 val_m = 0; \
|
||||
val0_m = LW(psrc_ld_m); \
|
||||
val1_m = LW(psrc_ld_m + 4); \
|
||||
val_m = (uint64)(val1_m); /* NOLINT */ \
|
||||
val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
||||
val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
#else // !(__mips_isa_rev >= 6)
|
||||
#define LW(psrc) \
|
||||
({ \
|
||||
uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \
|
||||
uint32 val_m; \
|
||||
asm volatile("ulw %[val_m], %[psrc_lw_m] \n\t" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_lw_m] "m"(*psrc_lw_m)); \
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) ({ \
|
||||
uint8* psrc_ld_m = (uint8*) (psrc); /* NOLINT */ \
|
||||
uint64 val_m = 0; \
|
||||
asm volatile ( \
|
||||
"uld %[val_m], %[psrc_ld_m] \n\t" \
|
||||
: [val_m] "=r" (val_m) \
|
||||
: [psrc_ld_m] "m" (*psrc_ld_m) \
|
||||
); \
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) ({ \
|
||||
uint8* psrc_ld_m = (uint8*) (psrc); /* NOLINT */ \
|
||||
uint32 val0_m, val1_m; \
|
||||
uint64 val_m = 0; \
|
||||
val0_m = LW(psrc_ld_m); \
|
||||
val1_m = LW(psrc_ld_m + 4); \
|
||||
val_m = (uint64) (val1_m); /* NOLINT */ \
|
||||
val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
||||
val_m = (uint64) (val_m | (uint64) val0_m); /* NOLINT */ \
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
|
||||
uint64 val_m = 0; \
|
||||
asm volatile("uld %[val_m], %[psrc_ld_m] \n\t" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_ld_m] "m"(*psrc_ld_m)); \
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \
|
||||
uint32 val0_m, val1_m; \
|
||||
uint64 val_m = 0; \
|
||||
val0_m = LW(psrc_ld_m); \
|
||||
val1_m = LW(psrc_ld_m + 4); \
|
||||
val_m = (uint64)(val1_m); /* NOLINT */ \
|
||||
val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
||||
val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
#endif // (__mips_isa_rev >= 6)
|
||||
|
||||
// TODO(fbarchard): Consider removing __VAR_ARGS versions.
|
||||
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
|
||||
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
|
||||
#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
|
||||
|
||||
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
|
||||
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
|
||||
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
|
||||
|
||||
/* Description : Load two vectors with 16 'byte' sized elements
|
||||
@ -103,16 +101,18 @@
|
||||
Details : Load 16 byte elements in 'out0' from (psrc)
|
||||
Load 16 byte elements in 'out1' from (psrc + stride)
|
||||
*/
|
||||
#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
|
||||
out0 = LD_B(RTYPE, (psrc)); \
|
||||
out1 = LD_B(RTYPE, (psrc) + stride); \
|
||||
}
|
||||
#define LD_B2(RTYPE, psrc, stride, out0, out1) \
|
||||
{ \
|
||||
out0 = LD_B(RTYPE, (psrc)); \
|
||||
out1 = LD_B(RTYPE, (psrc) + stride); \
|
||||
}
|
||||
#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
|
||||
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||
LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
|
||||
}
|
||||
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
|
||||
{ \
|
||||
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||
LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
|
||||
}
|
||||
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
|
||||
|
||||
/* Description : Store two vectors with stride each having 16 'byte' sized
|
||||
@ -121,16 +121,18 @@
|
||||
Details : Store 16 byte elements from 'in0' to (pdst)
|
||||
Store 16 byte elements from 'in1' to (pdst + stride)
|
||||
*/
|
||||
#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
|
||||
ST_B(RTYPE, in0, (pdst)); \
|
||||
ST_B(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_B2(RTYPE, in0, in1, pdst, stride) \
|
||||
{ \
|
||||
ST_B(RTYPE, in0, (pdst)); \
|
||||
ST_B(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
|
||||
ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
||||
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
||||
}
|
||||
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
|
||||
{ \
|
||||
ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
||||
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
||||
}
|
||||
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
|
||||
|
||||
// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.
|
||||
@ -141,10 +143,11 @@
|
||||
Details : Byte elements from 'in0' & 'in1' are copied selectively to
|
||||
'out0' as per control vector 'mask0'
|
||||
*/
|
||||
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
|
||||
out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
|
||||
out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
|
||||
}
|
||||
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
|
||||
{ \
|
||||
out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
|
||||
out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
|
||||
}
|
||||
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
/* Description : Interleave both left and right half of input vectors
|
||||
@ -154,12 +157,13 @@
|
||||
Details : Right half of byte elements from 'in0' and 'in1' are
|
||||
interleaved and written to 'out0'
|
||||
*/
|
||||
#define ILVRL_B2(RTYPE, in0, in1, out0, out1) { \
|
||||
out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
|
||||
out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
|
||||
}
|
||||
#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
|
||||
{ \
|
||||
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
|
||||
out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
|
||||
}
|
||||
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
|
||||
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
|
||||
|
||||
#endif // INCLUDE_LIBYUV_MACROS_MSA_H_
|
||||
|
||||
@ -144,12 +144,16 @@ class LIBYUV_API MJpegDecoder {
|
||||
// callback function. Each call will get the data for a whole number of
|
||||
// image scanlines.
|
||||
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
|
||||
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
|
||||
int dst_width, int dst_height);
|
||||
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn,
|
||||
void* opaque,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// The helper function which recognizes the jpeg sub-sampling type.
|
||||
static JpegSubsamplingType JpegSubsamplingTypeHelper(
|
||||
int* subsample_x, int* subsample_y, int number_of_components);
|
||||
int* subsample_x,
|
||||
int* subsample_y,
|
||||
int number_of_components);
|
||||
|
||||
private:
|
||||
void AllocOutputBuffers(int num_outbufs);
|
||||
|
||||
@ -24,105 +24,164 @@ extern "C" {
|
||||
|
||||
// Copy a plane of data.
|
||||
LIBYUV_API
|
||||
void CopyPlane(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
void CopyPlane(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void CopyPlane_16(const uint16* src_y, int src_stride_y,
|
||||
uint16* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
void CopyPlane_16(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
uint16* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Set a plane of data to a 32 bit value.
|
||||
LIBYUV_API
|
||||
void SetPlane(uint8* dst_y, int dst_stride_y,
|
||||
int width, int height,
|
||||
void SetPlane(uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height,
|
||||
uint32 value);
|
||||
|
||||
// Split interleaved UV plane into separate U and V planes.
|
||||
LIBYUV_API
|
||||
void SplitUVPlane(const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
void SplitUVPlane(const uint8* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Merge separate U and V planes into one interleaved UV plane.
|
||||
LIBYUV_API
|
||||
void MergeUVPlane(const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
void MergeUVPlane(const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I400. Supports inverting.
|
||||
LIBYUV_API
|
||||
int I400ToI400(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int I400ToI400(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define J400ToJ400 I400ToI400
|
||||
|
||||
// Copy I422 to I422.
|
||||
#define I422ToI422 I422Copy
|
||||
LIBYUV_API
|
||||
int I422Copy(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I422Copy(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I444 to I444.
|
||||
#define I444ToI444 I444Copy
|
||||
LIBYUV_API
|
||||
int I444Copy(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I444Copy(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert YUY2 to I422.
|
||||
LIBYUV_API
|
||||
int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int YUY2ToI422(const uint8* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert UYVY to I422.
|
||||
LIBYUV_API
|
||||
int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int UYVYToI422(const uint8* src_uyvy,
|
||||
int src_stride_uyvy,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
int YUY2ToNV12(const uint8* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
int UYVYToNV12(const uint8* src_uyvy,
|
||||
int src_stride_uyvy,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int YUY2ToY(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int YUY2ToY(const uint8* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to I400. (calls CopyPlane ignoring u/v).
|
||||
LIBYUV_API
|
||||
int I420ToI400(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int I420ToI400(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Alias
|
||||
#define J420ToJ400 I420ToI400
|
||||
@ -130,13 +189,20 @@ int I420ToI400(const uint8* src_y, int src_stride_y,
|
||||
|
||||
// I420 mirror.
|
||||
LIBYUV_API
|
||||
int I420Mirror(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I420Mirror(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Alias
|
||||
#define I400ToI400Mirror I400Mirror
|
||||
@ -144,87 +210,139 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
|
||||
// I400 mirror. A single plane is mirrored horizontally.
|
||||
// Pass negative height to achieve 180 degree rotation.
|
||||
LIBYUV_API
|
||||
int I400Mirror(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int I400Mirror(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Alias
|
||||
#define ARGBToARGBMirror ARGBMirror
|
||||
|
||||
// ARGB mirror.
|
||||
LIBYUV_API
|
||||
int ARGBMirror(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBMirror(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV12 to RGB565.
|
||||
LIBYUV_API
|
||||
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height);
|
||||
int NV12ToRGB565(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// I422ToARGB is in convert_argb.h
|
||||
// Convert I422 to BGRA.
|
||||
LIBYUV_API
|
||||
int I422ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height);
|
||||
int I422ToBGRA(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_bgra,
|
||||
int dst_stride_bgra,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to ABGR.
|
||||
LIBYUV_API
|
||||
int I422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int I422ToABGR(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to RGBA.
|
||||
LIBYUV_API
|
||||
int I422ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height);
|
||||
int I422ToRGBA(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Alias
|
||||
#define RGB24ToRAW RAWToRGB24
|
||||
|
||||
LIBYUV_API
|
||||
int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height);
|
||||
int RAWToRGB24(const uint8* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Draw a rectangle into I420.
|
||||
LIBYUV_API
|
||||
int I420Rect(uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int x, int y, int width, int height,
|
||||
int value_y, int value_u, int value_v);
|
||||
int I420Rect(uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height,
|
||||
int value_y,
|
||||
int value_u,
|
||||
int value_v);
|
||||
|
||||
// Draw a rectangle into ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBRect(uint8* dst_argb, int dst_stride_argb,
|
||||
int x, int y, int width, int height, uint32 value);
|
||||
int ARGBRect(uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height,
|
||||
uint32 value);
|
||||
|
||||
// Convert ARGB to gray scale ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBGrayTo(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Make a rectangle of ARGB gray scale.
|
||||
LIBYUV_API
|
||||
int ARGBGray(uint8* dst_argb, int dst_stride_argb,
|
||||
int x, int y, int width, int height);
|
||||
int ARGBGray(uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Make a rectangle of ARGB Sepia tone.
|
||||
LIBYUV_API
|
||||
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
|
||||
int x, int y, int width, int height);
|
||||
int ARGBSepia(uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Apply a matrix rotation to each ARGB pixel.
|
||||
// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2.
|
||||
@ -233,10 +351,13 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
|
||||
// The next 4 coefficients apply to B, G, R, A and produce R of the output.
|
||||
// The last 4 coefficients apply to B, G, R, A and produce A of the output.
|
||||
LIBYUV_API
|
||||
int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int ARGBColorMatrix(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const int8* matrix_argb,
|
||||
int width, int height);
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Deprecated. Use ARGBColorMatrix instead.
|
||||
// Apply a matrix rotation to each ARGB pixel.
|
||||
@ -245,32 +366,47 @@ int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
|
||||
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
|
||||
// The last 4 coefficients apply to B, G, R, A and produce R of the output.
|
||||
LIBYUV_API
|
||||
int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
|
||||
int RGBColorMatrix(uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const int8* matrix_rgb,
|
||||
int x, int y, int width, int height);
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Apply a color table each ARGB pixel.
|
||||
// Table contains 256 ARGB values.
|
||||
LIBYUV_API
|
||||
int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
|
||||
int ARGBColorTable(uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const uint8* table_argb,
|
||||
int x, int y, int width, int height);
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Apply a color table each ARGB pixel but preserve destination alpha.
|
||||
// Table contains 256 ARGB values.
|
||||
LIBYUV_API
|
||||
int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
|
||||
int RGBColorTable(uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const uint8* table_argb,
|
||||
int x, int y, int width, int height);
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Apply a luma/color table each ARGB pixel but preserve destination alpha.
|
||||
// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from
|
||||
// RGB (YJ style) and C is an 8 bit color component (R, G or B).
|
||||
LIBYUV_API
|
||||
int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int ARGBLumaColorTable(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const uint8* luma_rgb_table,
|
||||
int width, int height);
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Apply a 3 term polynomial to ARGB values.
|
||||
// poly points to a 4x4 matrix. The first row is constants. The 2nd row is
|
||||
@ -281,54 +417,80 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
|
||||
// A polynomial approximation can be dirived using software such as 'R'.
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int ARGBPolynomial(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const float* poly,
|
||||
int width, int height);
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert plane of 16 bit shorts to half floats.
|
||||
// Source values are multiplied by scale before storing as half float.
|
||||
LIBYUV_API
|
||||
int HalfFloatPlane(const uint16* src_y, int src_stride_y,
|
||||
uint16* dst_y, int dst_stride_y,
|
||||
int HalfFloatPlane(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
uint16* dst_y,
|
||||
int dst_stride_y,
|
||||
float scale,
|
||||
int width, int height);
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Quantize a rectangle of ARGB. Alpha unaffected.
|
||||
// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
|
||||
// interval_size should be a value between 1 and 255.
|
||||
// interval_offset should be a value between 0 and 255.
|
||||
LIBYUV_API
|
||||
int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
|
||||
int scale, int interval_size, int interval_offset,
|
||||
int x, int y, int width, int height);
|
||||
int ARGBQuantize(uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int scale,
|
||||
int interval_size,
|
||||
int interval_offset,
|
||||
int x,
|
||||
int y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBCopy(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy Alpha channel of ARGB to alpha of ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBCopyAlpha(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Extract the alpha channel from ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
int width, int height);
|
||||
int ARGBExtractAlpha(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy Y channel to Alpha of ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBCopyYToAlpha(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width);
|
||||
typedef void (*ARGBBlendRow)(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width);
|
||||
|
||||
// Get function to Alpha Blend ARGB pixels and store to destination.
|
||||
LIBYUV_API
|
||||
@ -338,92 +500,143 @@ ARGBBlendRow GetARGBBlend();
|
||||
// Source is pre-multiplied by alpha using ARGBAttenuate.
|
||||
// Alpha of destination is set to 255.
|
||||
LIBYUV_API
|
||||
int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
|
||||
const uint8* src_argb1, int src_stride_argb1,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBBlend(const uint8* src_argb0,
|
||||
int src_stride_argb0,
|
||||
const uint8* src_argb1,
|
||||
int src_stride_argb1,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Alpha Blend plane and store to destination.
|
||||
// Source is not pre-multiplied by alpha.
|
||||
LIBYUV_API
|
||||
int BlendPlane(const uint8* src_y0, int src_stride_y0,
|
||||
const uint8* src_y1, int src_stride_y1,
|
||||
const uint8* alpha, int alpha_stride,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int BlendPlane(const uint8* src_y0,
|
||||
int src_stride_y0,
|
||||
const uint8* src_y1,
|
||||
int src_stride_y1,
|
||||
const uint8* alpha,
|
||||
int alpha_stride,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Alpha Blend YUV images and store to destination.
|
||||
// Source is not pre-multiplied by alpha.
|
||||
// Alpha is full width x height and subsampled to half size to apply to UV.
|
||||
LIBYUV_API
|
||||
int I420Blend(const uint8* src_y0, int src_stride_y0,
|
||||
const uint8* src_u0, int src_stride_u0,
|
||||
const uint8* src_v0, int src_stride_v0,
|
||||
const uint8* src_y1, int src_stride_y1,
|
||||
const uint8* src_u1, int src_stride_u1,
|
||||
const uint8* src_v1, int src_stride_v1,
|
||||
const uint8* alpha, int alpha_stride,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I420Blend(const uint8* src_y0,
|
||||
int src_stride_y0,
|
||||
const uint8* src_u0,
|
||||
int src_stride_u0,
|
||||
const uint8* src_v0,
|
||||
int src_stride_v0,
|
||||
const uint8* src_y1,
|
||||
int src_stride_y1,
|
||||
const uint8* src_u1,
|
||||
int src_stride_u1,
|
||||
const uint8* src_v1,
|
||||
int src_stride_v1,
|
||||
const uint8* alpha,
|
||||
int alpha_stride,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
|
||||
LIBYUV_API
|
||||
int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
|
||||
const uint8* src_argb1, int src_stride_argb1,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBMultiply(const uint8* src_argb0,
|
||||
int src_stride_argb0,
|
||||
const uint8* src_argb1,
|
||||
int src_stride_argb1,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Add ARGB image with ARGB image. Saturates to 255.
|
||||
LIBYUV_API
|
||||
int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
|
||||
const uint8* src_argb1, int src_stride_argb1,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBAdd(const uint8* src_argb0,
|
||||
int src_stride_argb0,
|
||||
const uint8* src_argb1,
|
||||
int src_stride_argb1,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0.
|
||||
LIBYUV_API
|
||||
int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
|
||||
const uint8* src_argb1, int src_stride_argb1,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBSubtract(const uint8* src_argb0,
|
||||
int src_stride_argb0,
|
||||
const uint8* src_argb1,
|
||||
int src_stride_argb1,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to YUY2.
|
||||
LIBYUV_API
|
||||
int I422ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I422ToYUY2(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to UYVY.
|
||||
LIBYUV_API
|
||||
int I422ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I422ToUYVY(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_frame,
|
||||
int dst_stride_frame,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert unattentuated ARGB to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBAttenuate(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert preattentuated ARGB to unattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBUnattenuate(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Internal function - do not call directly.
|
||||
// Computes table of cumulative sum for image where the value is the sum
|
||||
// of all values above and to the left of the entry. Used by ARGBBlur.
|
||||
LIBYUV_API
|
||||
int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
|
||||
int32* dst_cumsum, int dst_stride32_cumsum,
|
||||
int width, int height);
|
||||
int ARGBComputeCumulativeSum(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
int32* dst_cumsum,
|
||||
int dst_stride32_cumsum,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Blur ARGB image.
|
||||
// dst_cumsum table of width * (height + 1) * 16 bytes aligned to
|
||||
@ -432,49 +645,79 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
|
||||
// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5.
|
||||
// Blur is optimized for radius of 5 (11x11) or less.
|
||||
LIBYUV_API
|
||||
int ARGBBlur(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int32* dst_cumsum, int dst_stride32_cumsum,
|
||||
int width, int height, int radius);
|
||||
int ARGBBlur(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int32* dst_cumsum,
|
||||
int dst_stride32_cumsum,
|
||||
int width,
|
||||
int height,
|
||||
int radius);
|
||||
|
||||
// Multiply ARGB image by ARGB value.
|
||||
LIBYUV_API
|
||||
int ARGBShade(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, uint32 value);
|
||||
int ARGBShade(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height,
|
||||
uint32 value);
|
||||
|
||||
// Interpolate between two images using specified amount of interpolation
|
||||
// (0 to 255) and store to destination.
|
||||
// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
|
||||
// and 255 means 1% src0 and 99% src1.
|
||||
LIBYUV_API
|
||||
int InterpolatePlane(const uint8* src0, int src_stride0,
|
||||
const uint8* src1, int src_stride1,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height, int interpolation);
|
||||
int InterpolatePlane(const uint8* src0,
|
||||
int src_stride0,
|
||||
const uint8* src1,
|
||||
int src_stride1,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height,
|
||||
int interpolation);
|
||||
|
||||
// Interpolate between two ARGB images using specified amount of interpolation
|
||||
// Internally calls InterpolatePlane with width * 4 (bpp).
|
||||
LIBYUV_API
|
||||
int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
|
||||
const uint8* src_argb1, int src_stride_argb1,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int interpolation);
|
||||
int ARGBInterpolate(const uint8* src_argb0,
|
||||
int src_stride_argb0,
|
||||
const uint8* src_argb1,
|
||||
int src_stride_argb1,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height,
|
||||
int interpolation);
|
||||
|
||||
// Interpolate between two YUV images using specified amount of interpolation
|
||||
// Internally calls InterpolatePlane on each plane where the U and V planes
|
||||
// are half width and half height.
|
||||
LIBYUV_API
|
||||
int I420Interpolate(const uint8* src0_y, int src0_stride_y,
|
||||
const uint8* src0_u, int src0_stride_u,
|
||||
const uint8* src0_v, int src0_stride_v,
|
||||
const uint8* src1_y, int src1_stride_y,
|
||||
const uint8* src1_u, int src1_stride_u,
|
||||
const uint8* src1_v, int src1_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height, int interpolation);
|
||||
int I420Interpolate(const uint8* src0_y,
|
||||
int src0_stride_y,
|
||||
const uint8* src0_u,
|
||||
int src0_stride_u,
|
||||
const uint8* src0_v,
|
||||
int src0_stride_v,
|
||||
const uint8* src1_y,
|
||||
int src1_stride_y,
|
||||
const uint8* src1_u,
|
||||
int src1_stride_u,
|
||||
const uint8* src1_v,
|
||||
int src1_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
int interpolation);
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
@ -495,36 +738,55 @@ int I420Interpolate(const uint8* src0_y, int src0_stride_y,
|
||||
// Row function for copying pixels from a source with a slope to a row
|
||||
// of destination. Useful for scaling, rotation, mirror, texture mapping.
|
||||
LIBYUV_API
|
||||
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
|
||||
uint8* dst_argb, const float* uv_dudv, int width);
|
||||
void ARGBAffineRow_C(const uint8* src_argb,
|
||||
int src_argb_stride,
|
||||
uint8* dst_argb,
|
||||
const float* uv_dudv,
|
||||
int width);
|
||||
LIBYUV_API
|
||||
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
|
||||
uint8* dst_argb, const float* uv_dudv, int width);
|
||||
void ARGBAffineRow_SSE2(const uint8* src_argb,
|
||||
int src_argb_stride,
|
||||
uint8* dst_argb,
|
||||
const float* uv_dudv,
|
||||
int width);
|
||||
|
||||
// Shuffle ARGB channel order. e.g. BGRA to ARGB.
|
||||
// shuffler is 16 bytes and must be aligned.
|
||||
LIBYUV_API
|
||||
int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
const uint8* shuffler, int width, int height);
|
||||
int ARGBShuffle(const uint8* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
const uint8* shuffler,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Sobel ARGB effect with planar output.
|
||||
LIBYUV_API
|
||||
int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int ARGBSobelToPlane(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Sobel ARGB effect.
|
||||
LIBYUV_API
|
||||
int ARGBSobel(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBSobel(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBSobelXY(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -20,8 +20,8 @@ extern "C" {
|
||||
|
||||
// Supported rotation.
|
||||
typedef enum RotationMode {
|
||||
kRotate0 = 0, // No rotation.
|
||||
kRotate90 = 90, // Rotate 90 degrees clockwise.
|
||||
kRotate0 = 0, // No rotation.
|
||||
kRotate90 = 90, // Rotate 90 degrees clockwise.
|
||||
kRotate180 = 180, // Rotate 180 degrees.
|
||||
kRotate270 = 270, // Rotate 270 degrees clockwise.
|
||||
|
||||
@ -33,81 +33,128 @@ typedef enum RotationMode {
|
||||
|
||||
// Rotate I420 frame.
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
int I420Rotate(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate NV12 input and store in I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
int NV12ToI420Rotate(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate a plane by 0, 90, 180, or 270.
|
||||
LIBYUV_API
|
||||
int RotatePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
int RotatePlane(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate planes by 90, 180, 270. Deprecated.
|
||||
LIBYUV_API
|
||||
void RotatePlane90(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
void RotatePlane90(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
void RotatePlane180(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane270(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
void RotatePlane270(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV90(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void RotateUV90(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Rotations for when U and V are interleaved.
|
||||
// These functions take one input pointer and
|
||||
// split the data into two buffers while
|
||||
// rotating them. Deprecated.
|
||||
LIBYUV_API
|
||||
void RotateUV180(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void RotateUV180(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV270(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void RotateUV270(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// The 90 and 270 functions are based on transposes.
|
||||
// Doing a transpose with reversing the read/write
|
||||
// order will result in a rotation by +- 90 degrees.
|
||||
// Deprecated.
|
||||
LIBYUV_API
|
||||
void TransposePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
void TransposePlane(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void TransposeUV(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void TransposeUV(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -21,9 +21,13 @@ extern "C" {
|
||||
|
||||
// Rotate ARGB frame
|
||||
LIBYUV_API
|
||||
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
int ARGBRotate(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
enum RotationMode mode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -36,7 +36,8 @@ extern "C" {
|
||||
|
||||
// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
|
||||
(defined(__i386__) || \
|
||||
(defined(__x86_64__) && !defined(__native_client__)))
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
#endif
|
||||
|
||||
@ -54,64 +55,129 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
|
||||
defined(__mips__) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
#define HAS_TRANSPOSEWX8_DSPR2
|
||||
#define HAS_TRANSPOSEUVWX8_DSPR2
|
||||
#endif // defined(__mips__)
|
||||
|
||||
void TransposeWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height);
|
||||
void TransposeWxH_C(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeWx8_C(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_C(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_NEON(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_SSSE3(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Fast_SSSE3(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_DSPR2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Fast_DSPR2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Any_NEON(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Any_SSSE3(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Fast_Any_SSSE3(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Any_DSPR2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeUVWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void TransposeUVWxH_C(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeUVWx8_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_C(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_SSE2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_NEON(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_DSPR2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
|
||||
void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_Any_SSE2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_Any_NEON(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_Any_DSPR2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
1478
include/libyuv/row.h
1478
include/libyuv/row.h
File diff suppressed because it is too large
Load Diff
@ -20,25 +20,33 @@ extern "C" {
|
||||
|
||||
// Supported filtering.
|
||||
typedef enum FilterMode {
|
||||
kFilterNone = 0, // Point sample; Fastest.
|
||||
kFilterLinear = 1, // Filter horizontally only.
|
||||
kFilterNone = 0, // Point sample; Fastest.
|
||||
kFilterLinear = 1, // Filter horizontally only.
|
||||
kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
|
||||
kFilterBox = 3 // Highest quality.
|
||||
kFilterBox = 3 // Highest quality.
|
||||
} FilterModeEnum;
|
||||
|
||||
// Scale a YUV plane.
|
||||
LIBYUV_API
|
||||
void ScalePlane(const uint8* src, int src_stride,
|
||||
int src_width, int src_height,
|
||||
uint8* dst, int dst_stride,
|
||||
int dst_width, int dst_height,
|
||||
void ScalePlane(const uint8* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
void ScalePlane_16(const uint16* src, int src_stride,
|
||||
int src_width, int src_height,
|
||||
uint16* dst, int dst_stride,
|
||||
int dst_width, int dst_height,
|
||||
void ScalePlane_16(const uint16* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scales a YUV 4:2:0 image from the src width and height to the
|
||||
@ -52,42 +60,73 @@ void ScalePlane_16(const uint16* src, int src_stride,
|
||||
// Returns 0 if successful.
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int dst_width, int dst_height,
|
||||
int I420Scale(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale_16(const uint16* src_y, int src_stride_y,
|
||||
const uint16* src_u, int src_stride_u,
|
||||
const uint16* src_v, int src_stride_v,
|
||||
int src_width, int src_height,
|
||||
uint16* dst_y, int dst_stride_y,
|
||||
uint16* dst_u, int dst_stride_u,
|
||||
uint16* dst_v, int dst_stride_v,
|
||||
int dst_width, int dst_height,
|
||||
int I420Scale_16(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
// Legacy API. Deprecated.
|
||||
LIBYUV_API
|
||||
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
|
||||
int src_stride_y, int src_stride_u, int src_stride_v,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_y, uint8* dst_u, uint8* dst_v,
|
||||
int dst_stride_y, int dst_stride_u, int dst_stride_v,
|
||||
int dst_width, int dst_height,
|
||||
int Scale(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_y,
|
||||
int src_stride_u,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_y,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_y,
|
||||
int dst_stride_u,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
LIBYUV_BOOL interpolate);
|
||||
|
||||
// Legacy API. Deprecated.
|
||||
LIBYUV_API
|
||||
int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
|
||||
uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
|
||||
int ScaleOffset(const uint8* src_i420,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_i420,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int dst_yoffset,
|
||||
LIBYUV_BOOL interpolate);
|
||||
|
||||
// For testing, allow disabling of specialized scalers.
|
||||
|
||||
@ -20,32 +20,52 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int ARGBScale(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Clipped scale takes destination rectangle coordinates for clip values.
|
||||
LIBYUV_API
|
||||
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
int ARGBScaleClip(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int YUVToARGBScaleClip(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint32 src_fourcc,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
uint32 dst_fourcc,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -45,8 +45,8 @@ extern "C" {
|
||||
#endif // __clang__
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
||||
defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
|
||||
_MSC_VER >= 1700
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
|
||||
@ -72,8 +72,9 @@ extern "C" {
|
||||
// The following are available on all x86 platforms, but
|
||||
// require VS2012, clang 3.4 or gcc 4.7.
|
||||
// The code supports NaCL but requires a new compiler and validator.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
|
||||
defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
|
||||
defined(GCC_HAS_AVX2))
|
||||
#define HAS_SCALEADDROW_AVX2
|
||||
#define HAS_SCALEROWDOWN2_AVX2
|
||||
#define HAS_SCALEROWDOWN4_AVX2
|
||||
@ -104,22 +105,36 @@ extern "C" {
|
||||
|
||||
// Scale ARGB vertically with bilinear interpolation.
|
||||
void ScalePlaneVertical(int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int y, int dy,
|
||||
int bpp, enum FilterMode filtering);
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int x,
|
||||
int y,
|
||||
int dy,
|
||||
int bpp,
|
||||
enum FilterMode filtering);
|
||||
|
||||
void ScalePlaneVertical_16(int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint16* src_argb, uint16* dst_argb,
|
||||
int x, int y, int dy,
|
||||
int wpp, enum FilterMode filtering);
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint16* src_argb,
|
||||
uint16* dst_argb,
|
||||
int x,
|
||||
int y,
|
||||
int dy,
|
||||
int wpp,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Simplify the filtering based on scale factors.
|
||||
enum FilterMode ScaleFilterReduce(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
enum FilterMode ScaleFilterReduce(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
@ -137,363 +152,647 @@ int FixedDiv1_X86(int num, int div);
|
||||
#endif
|
||||
|
||||
// Compute slope values for stepping.
|
||||
void ScaleSlope(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
void ScaleSlope(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering,
|
||||
int* x, int* y, int* dx, int* dy);
|
||||
int* x,
|
||||
int* y,
|
||||
int* dx,
|
||||
int* dy);
|
||||
|
||||
void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width);
|
||||
void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* d, int dst_width);
|
||||
void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width);
|
||||
void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* d, int dst_width);
|
||||
void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int, int);
|
||||
void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int, int);
|
||||
void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width);
|
||||
void ScaleRowDown2_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown4_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown4_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* d,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* d,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* d,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* d,
|
||||
int dst_width);
|
||||
void ScaleCols_C(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleCols_16_C(uint16* dst_ptr,
|
||||
const uint16* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleColsUp2_C(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int,
|
||||
int);
|
||||
void ScaleColsUp2_16_C(uint16* dst_ptr,
|
||||
const uint16* src_ptr,
|
||||
int dst_width,
|
||||
int,
|
||||
int);
|
||||
void ScaleFilterCols_C(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleFilterCols_16_C(uint16* dst_ptr,
|
||||
const uint16* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleFilterCols64_C(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleFilterCols64_16_C(uint16* dst_ptr,
|
||||
const uint16* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleRowDown38_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int dst_width);
|
||||
uint16* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width);
|
||||
void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width);
|
||||
void ScaleARGBRowDown2_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Box_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEven_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int, int);
|
||||
void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBCols_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBCols64_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBColsUp2_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int,
|
||||
int);
|
||||
void ScaleARGBFilterCols_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBFilterCols64_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
// Specialized scalers for x86.
|
||||
void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown34_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2_Any_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4_Any_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
|
||||
void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
|
||||
void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
|
||||
void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
|
||||
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleColsUp2_SSE2(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
// ARGB Column functions
|
||||
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleARGBCols_SSE2(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBColsUp2_SSE2(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBFilterCols_NEON(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBCols_NEON(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
void ScaleARGBCols_Any_NEON(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
// ARGB Row functions
|
||||
void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx, uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width);
|
||||
uint8* dst_argb,
|
||||
int dst_width);
|
||||
|
||||
// ScaleRowDown2Box also used by planar functions
|
||||
// NEON downscalers with interpolation.
|
||||
|
||||
// Note - not static due to reuse in convert for 444 to 420.
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
|
||||
// to load up the every 4th pixel into a 4 different registers.
|
||||
// Point samples 32 pixels to 24 pixels.
|
||||
void ScaleRowDown34_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
// 32 -> 12
|
||||
void ScaleRowDown38_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
// 32x3 -> 12x1
|
||||
void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
// 32x2 -> 12x1
|
||||
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown4_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
// 32 -> 12
|
||||
void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
// 32x3 -> 12x1
|
||||
void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
// 32x2 -> 12x1
|
||||
void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
|
||||
void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width);
|
||||
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx);
|
||||
void ScaleFilterCols_Any_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx);
|
||||
|
||||
void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width);
|
||||
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width);
|
||||
void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width);
|
||||
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
void ScaleRowDown2_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown4_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* d,
|
||||
int dst_width);
|
||||
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* d,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@ -28,13 +28,13 @@ extern "C" {
|
||||
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
|
||||
// constants are used in a switch.
|
||||
#ifdef __cplusplus
|
||||
#define FOURCC(a, b, c, d) ( \
|
||||
(static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
|
||||
(static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
|
||||
#define FOURCC(a, b, c, d) \
|
||||
((static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
|
||||
(static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
|
||||
#else
|
||||
#define FOURCC(a, b, c, d) ( \
|
||||
((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
|
||||
((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
|
||||
#define FOURCC(a, b, c, d) \
|
||||
(((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
|
||||
((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
|
||||
#endif
|
||||
|
||||
// Some pages discussing FourCC codes:
|
||||
@ -69,7 +69,7 @@ enum FourCC {
|
||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
|
||||
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
|
||||
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
|
||||
@ -137,7 +137,7 @@ enum FourCCBpp {
|
||||
FOURCC_BPP_ABGR = 32,
|
||||
FOURCC_BPP_RGBA = 32,
|
||||
FOURCC_BPP_24BG = 24,
|
||||
FOURCC_BPP_RAW = 24,
|
||||
FOURCC_BPP_RAW = 24,
|
||||
FOURCC_BPP_RGBP = 16,
|
||||
FOURCC_BPP_RGBO = 16,
|
||||
FOURCC_BPP_R444 = 16,
|
||||
@ -170,7 +170,7 @@ enum FourCCBpp {
|
||||
FOURCC_BPP_CM24 = 24,
|
||||
|
||||
// Match any fourcc.
|
||||
FOURCC_BPP_ANY = 0, // 0 means unknown.
|
||||
FOURCC_BPP_ANY = 0, // 0 means unknown.
|
||||
};
|
||||
|
||||
// Converts fourcc aliases into canonical ones.
|
||||
|
||||
@ -32,8 +32,7 @@ LIBYUV_API
|
||||
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
int remainder;
|
||||
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
|
||||
HashDjb2_C;
|
||||
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
|
||||
#if defined(HAS_HASHDJB2_SSE41)
|
||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||
HashDjb2_SSE = HashDjb2_SSE41;
|
||||
@ -50,13 +49,13 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
src += kBlockSize;
|
||||
count -= kBlockSize;
|
||||
}
|
||||
remainder = (int)(count) & ~15;
|
||||
remainder = (int)count & ~15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_SSE(src, remainder, seed);
|
||||
src += remainder;
|
||||
count -= remainder;
|
||||
}
|
||||
remainder = (int)(count) & 15;
|
||||
remainder = (int)count & 15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_C(src, remainder, seed);
|
||||
}
|
||||
@ -113,7 +112,8 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
|
||||
|
||||
// TODO(fbarchard): Refactor into row function.
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
uint64 ComputeSumSquareError(const uint8* src_a,
|
||||
const uint8* src_b,
|
||||
int count) {
|
||||
// SumSquareError returns values 0 to 65535 for each squared difference.
|
||||
// Up to 65536 of those can be summed and remain within a uint32.
|
||||
@ -142,7 +142,7 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
}
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+: sse)
|
||||
#pragma omp parallel for reduction(+ : sse)
|
||||
#endif
|
||||
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
|
||||
@ -162,14 +162,16 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a,
|
||||
int stride_a,
|
||||
const uint8* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
uint64 sse = 0;
|
||||
int h;
|
||||
// Coalesce rows.
|
||||
if (stride_a == width &&
|
||||
stride_b == width) {
|
||||
if (stride_a == width && stride_b == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
stride_a = stride_b = 0;
|
||||
@ -186,10 +188,10 @@ LIBYUV_API
|
||||
double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
|
||||
double psnr;
|
||||
if (sse > 0) {
|
||||
double mse = (double)(count) / (double)(sse);
|
||||
double mse = (double)count / (double)sse;
|
||||
psnr = 10.0 * log10(255.0 * 255.0 * mse);
|
||||
} else {
|
||||
psnr = kMaxPsnr; // Limit to prevent divide by 0
|
||||
psnr = kMaxPsnr; // Limit to prevent divide by 0
|
||||
}
|
||||
|
||||
if (psnr > kMaxPsnr)
|
||||
@ -199,45 +201,53 @@ double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFramePsnr(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
double CalcFramePsnr(const uint8* src_a,
|
||||
int stride_a,
|
||||
const uint8* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
const uint64 samples = width * height;
|
||||
const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
|
||||
src_b, stride_b,
|
||||
width, height);
|
||||
const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b,
|
||||
stride_b, width, height);
|
||||
return SumSquareErrorToPsnr(sse, samples);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double I420Psnr(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height) {
|
||||
const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
|
||||
src_y_b, stride_y_b,
|
||||
width, height);
|
||||
double I420Psnr(const uint8* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height) {
|
||||
const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, src_y_b,
|
||||
stride_y_b, width, height);
|
||||
const int width_uv = (width + 1) >> 1;
|
||||
const int height_uv = (height + 1) >> 1;
|
||||
const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
|
||||
src_u_b, stride_u_b,
|
||||
width_uv, height_uv);
|
||||
const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
|
||||
src_v_b, stride_v_b,
|
||||
width_uv, height_uv);
|
||||
const uint64 sse_u = ComputeSumSquareErrorPlane(
|
||||
src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv);
|
||||
const uint64 sse_v = ComputeSumSquareErrorPlane(
|
||||
src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv);
|
||||
const uint64 samples = width * height + 2 * (width_uv * height_uv);
|
||||
const uint64 sse = sse_y + sse_u + sse_v;
|
||||
return SumSquareErrorToPsnr(sse, samples);
|
||||
}
|
||||
|
||||
static const int64 cc1 = 26634; // (64^2*(.01*255)^2
|
||||
static const int64 cc1 = 26634; // (64^2*(.01*255)^2
|
||||
static const int64 cc2 = 239708; // (64^2*(.03*255)^2
|
||||
|
||||
static double Ssim8x8_C(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b) {
|
||||
static double Ssim8x8_C(const uint8* src_a,
|
||||
int stride_a,
|
||||
const uint8* src_b,
|
||||
int stride_b) {
|
||||
int64 sum_a = 0;
|
||||
int64 sum_b = 0;
|
||||
int64 sum_sq_a = 0;
|
||||
@ -270,12 +280,12 @@ static double Ssim8x8_C(const uint8* src_a, int stride_a,
|
||||
const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
|
||||
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
|
||||
|
||||
const int64 sum_a_sq = sum_a*sum_a;
|
||||
const int64 sum_b_sq = sum_b*sum_b;
|
||||
const int64 sum_a_sq = sum_a * sum_a;
|
||||
const int64 sum_b_sq = sum_b * sum_b;
|
||||
|
||||
const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
|
||||
(count * sum_sq_a - sum_a_sq +
|
||||
count * sum_sq_b - sum_b_sq + c2);
|
||||
const int64 ssim_d =
|
||||
(sum_a_sq + sum_b_sq + c1) *
|
||||
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
|
||||
|
||||
if (ssim_d == 0.0) {
|
||||
return DBL_MAX;
|
||||
@ -288,13 +298,16 @@ static double Ssim8x8_C(const uint8* src_a, int stride_a,
|
||||
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
|
||||
// block boundaries to penalize blocking artifacts.
|
||||
LIBYUV_API
|
||||
double CalcFrameSsim(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
double CalcFrameSsim(const uint8* src_a,
|
||||
int stride_a,
|
||||
const uint8* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int samples = 0;
|
||||
double ssim_total = 0;
|
||||
double (*Ssim8x8)(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b) = Ssim8x8_C;
|
||||
double (*Ssim8x8)(const uint8* src_a, int stride_a, const uint8* src_b,
|
||||
int stride_b) = Ssim8x8_C;
|
||||
|
||||
// sample point start with each 4x4 location
|
||||
int i;
|
||||
@ -314,22 +327,27 @@ double CalcFrameSsim(const uint8* src_a, int stride_a,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double I420Ssim(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height) {
|
||||
const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
|
||||
src_y_b, stride_y_b, width, height);
|
||||
double I420Ssim(const uint8* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height) {
|
||||
const double ssim_y =
|
||||
CalcFrameSsim(src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
|
||||
const int width_uv = (width + 1) >> 1;
|
||||
const int height_uv = (height + 1) >> 1;
|
||||
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
|
||||
src_u_b, stride_u_b,
|
||||
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, src_u_b, stride_u_b,
|
||||
width_uv, height_uv);
|
||||
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
|
||||
src_v_b, stride_v_b,
|
||||
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, src_v_b, stride_v_b,
|
||||
width_uv, height_uv);
|
||||
return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
|
||||
}
|
||||
|
||||
@ -62,30 +62,30 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
return sse;
|
||||
}
|
||||
|
||||
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
static uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
|
||||
static uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
static uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
static uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
static uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
@ -148,4 +148,3 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
@ -21,12 +21,12 @@ extern "C" {
|
||||
// This module is for 32 bit Visual C x86 and clangcl
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
__declspec(naked)
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
__declspec(naked) uint32
|
||||
SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm5, xmm5
|
||||
|
||||
@ -61,13 +61,13 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if _MSC_VER >= 1700
|
||||
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
|
||||
#pragma warning(disable: 4752)
|
||||
__declspec(naked)
|
||||
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
#pragma warning(disable : 4752)
|
||||
__declspec(naked) uint32
|
||||
SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
vpxor ymm0, ymm0, ymm0 // sum
|
||||
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
|
||||
sub edx, eax
|
||||
@ -101,65 +101,65 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
}
|
||||
#endif // _MSC_VER >= 1700
|
||||
|
||||
uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
|
||||
uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
__declspec(naked)
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
__declspec(naked) uint32
|
||||
HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
movd xmm0, [esp + 12] // seed
|
||||
|
||||
pxor xmm7, xmm7 // constant 0 for unpck
|
||||
pxor xmm7, xmm7 // constant 0 for unpck
|
||||
movdqa xmm6, xmmword ptr kHash16x33
|
||||
|
||||
wloop:
|
||||
movdqu xmm1, [eax] // src[0-15]
|
||||
movdqu xmm1, [eax] // src[0-15]
|
||||
lea eax, [eax + 16]
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
movdqa xmm5, xmmword ptr kHashMul0
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm2, xmm7 // src[0-7]
|
||||
punpcklbw xmm2, xmm7 // src[0-7]
|
||||
movdqa xmm3, xmm2
|
||||
punpcklwd xmm3, xmm7 // src[0-3]
|
||||
punpcklwd xmm3, xmm7 // src[0-3]
|
||||
pmulld xmm3, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul1
|
||||
movdqa xmm4, xmm2
|
||||
punpckhwd xmm4, xmm7 // src[4-7]
|
||||
punpckhwd xmm4, xmm7 // src[4-7]
|
||||
pmulld xmm4, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul2
|
||||
punpckhbw xmm1, xmm7 // src[8-15]
|
||||
punpckhbw xmm1, xmm7 // src[8-15]
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm2, xmm7 // src[8-11]
|
||||
punpcklwd xmm2, xmm7 // src[8-11]
|
||||
pmulld xmm2, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul3
|
||||
punpckhwd xmm1, xmm7 // src[12-15]
|
||||
punpckhwd xmm1, xmm7 // src[12-15]
|
||||
pmulld xmm1, xmm5
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm1, xmm3
|
||||
|
||||
@ -171,18 +171,18 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
sub ecx, 16
|
||||
jg wloop
|
||||
|
||||
movd eax, xmm0 // return hash
|
||||
movd eax, xmm0 // return hash
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if _MSC_VER >= 1700
|
||||
__declspec(naked)
|
||||
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
__declspec(naked) uint32
|
||||
HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
vmovd xmm0, [esp + 12] // seed
|
||||
|
||||
wloop:
|
||||
@ -196,7 +196,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
vpmulld xmm2, xmm2, xmmword ptr kHashMul2
|
||||
lea eax, [eax + 16]
|
||||
vpmulld xmm1, xmm1, xmmword ptr kHashMul3
|
||||
vpaddd xmm3, xmm3, xmm4 // add 16 results
|
||||
vpaddd xmm3, xmm3, xmm4 // add 16 results
|
||||
vpaddd xmm1, xmm1, xmm2
|
||||
vpaddd xmm1, xmm1, xmm3
|
||||
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
@ -207,7 +207,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
sub ecx, 16
|
||||
jg wloop
|
||||
|
||||
vmovd eax, xmm0 // return hash
|
||||
vmovd eax, xmm0 // return hash
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
|
||||
1130
source/convert.cc
1130
source/convert.cc
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -30,87 +30,100 @@ static __inline int Abs(int v) {
|
||||
}
|
||||
|
||||
// I420 To any I4xx YUV format with mirroring.
|
||||
static int I420ToI4xx(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_y_width, int src_y_height,
|
||||
int dst_uv_width, int dst_uv_height) {
|
||||
static int I420ToI4xx(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_y_width,
|
||||
int src_y_height,
|
||||
int dst_uv_width,
|
||||
int dst_uv_height) {
|
||||
const int dst_y_width = Abs(src_y_width);
|
||||
const int dst_y_height = Abs(src_y_height);
|
||||
const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1);
|
||||
const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1);
|
||||
if (src_y_width == 0 || src_y_height == 0 ||
|
||||
dst_uv_width <= 0 || dst_uv_height <= 0) {
|
||||
if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 ||
|
||||
dst_uv_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
if (dst_y) {
|
||||
ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
|
||||
dst_y, dst_stride_y, dst_y_width, dst_y_height,
|
||||
kFilterBilinear);
|
||||
ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y,
|
||||
dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear);
|
||||
}
|
||||
ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
|
||||
dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
|
||||
kFilterBilinear);
|
||||
ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height,
|
||||
dst_v, dst_stride_v, dst_uv_width, dst_uv_height,
|
||||
kFilterBilinear);
|
||||
ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u,
|
||||
dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear);
|
||||
ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v,
|
||||
dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 420 chroma is 1/2 width, 1/2 height
|
||||
// 422 chroma is 1/2 width, 1x height
|
||||
LIBYUV_API
|
||||
int I420ToI422(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int I420ToI422(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
const int dst_uv_width = (Abs(width) + 1) >> 1;
|
||||
const int dst_uv_height = Abs(height);
|
||||
return I420ToI4xx(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_y, dst_stride_y,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
width, height,
|
||||
dst_uv_width, dst_uv_height);
|
||||
return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, width, height, dst_uv_width,
|
||||
dst_uv_height);
|
||||
}
|
||||
|
||||
// 420 chroma is 1/2 width, 1/2 height
|
||||
// 444 chroma is 1x width, 1x height
|
||||
LIBYUV_API
|
||||
int I420ToI444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int I420ToI444(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
const int dst_uv_width = Abs(width);
|
||||
const int dst_uv_height = Abs(height);
|
||||
return I420ToI4xx(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_y, dst_stride_y,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
width, height,
|
||||
dst_uv_width, dst_uv_height);
|
||||
return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, width, height, dst_uv_width,
|
||||
dst_uv_height);
|
||||
}
|
||||
|
||||
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
|
||||
LIBYUV_API
|
||||
int I400Copy(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height) {
|
||||
if (!src_y || !dst_y ||
|
||||
width <= 0 || height == 0) {
|
||||
int I400Copy(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height) {
|
||||
if (!src_y || !dst_y || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -124,17 +137,21 @@ int I400Copy(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I422ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_yuy2, int dst_stride_yuy2,
|
||||
int width, int height) {
|
||||
int I422ToYUY2(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_yuy2, int width) =
|
||||
I422ToYUY2Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_yuy2 ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -144,10 +161,8 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
dst_stride_yuy2 = -dst_stride_yuy2;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width &&
|
||||
dst_stride_yuy2 == width * 2) {
|
||||
if (src_stride_y == width && src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width && dst_stride_yuy2 == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
|
||||
@ -180,17 +195,21 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_yuy2, int dst_stride_yuy2,
|
||||
int width, int height) {
|
||||
int I420ToYUY2(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_yuy2, int width) =
|
||||
I422ToYUY2Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_yuy2 ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -240,17 +259,21 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I422ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_uyvy, int dst_stride_uyvy,
|
||||
int width, int height) {
|
||||
int I422ToUYVY(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_uyvy, int width) =
|
||||
I422ToUYVYRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_uyvy ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -260,10 +283,8 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
dst_stride_uyvy = -dst_stride_uyvy;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width &&
|
||||
dst_stride_uyvy == width * 2) {
|
||||
if (src_stride_y == width && src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width && dst_stride_uyvy == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
|
||||
@ -304,17 +325,21 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_uyvy, int dst_stride_uyvy,
|
||||
int width, int height) {
|
||||
int I420ToUYVY(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_uyvy, int width) =
|
||||
I422ToUYVYRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_uyvy ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -365,14 +390,20 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
|
||||
// TODO(fbarchard): test negative height for invert.
|
||||
LIBYUV_API
|
||||
int I420ToNV12(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height) {
|
||||
if (!src_y || !src_u || !src_v || !dst_y || !dst_uv ||
|
||||
width <= 0 || height == 0) {
|
||||
int I420ToNV12(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
}
|
||||
int halfwidth = (width + 1) / 2;
|
||||
@ -380,44 +411,47 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
|
||||
if (dst_y) {
|
||||
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
}
|
||||
MergeUVPlane(src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_uv, dst_stride_uv,
|
||||
MergeUVPlane(src_u, src_stride_u, src_v, src_stride_v, dst_uv, dst_stride_uv,
|
||||
halfwidth, halfheight);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV21(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height) {
|
||||
return I420ToNV12(src_y, src_stride_y,
|
||||
src_v, src_stride_v,
|
||||
src_u, src_stride_u,
|
||||
dst_y, dst_stride_y,
|
||||
dst_vu, dst_stride_vu,
|
||||
int I420ToNV21(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height) {
|
||||
return I420ToNV12(src_y, src_stride_y, src_v, src_stride_v, src_u,
|
||||
src_stride_u, dst_y, dst_stride_y, dst_vu, dst_stride_vu,
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I422 to RGBA with matrix
|
||||
static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
static int I420ToRGBAMatrix(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToRGBARow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGBARow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgba ||
|
||||
width <= 0 || height == 0) {
|
||||
void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I422ToRGBARow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -482,50 +516,58 @@ static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y,
|
||||
|
||||
// Convert I420 to RGBA.
|
||||
LIBYUV_API
|
||||
int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height) {
|
||||
return I420ToRGBAMatrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_rgba, dst_stride_rgba,
|
||||
&kYuvI601Constants,
|
||||
width, height);
|
||||
int I420ToRGBA(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
int width,
|
||||
int height) {
|
||||
return I420ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_rgba, dst_stride_rgba,
|
||||
&kYuvI601Constants, width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to BGRA.
|
||||
LIBYUV_API
|
||||
int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height) {
|
||||
return I420ToRGBAMatrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_bgra, dst_stride_bgra,
|
||||
int I420ToBGRA(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_bgra,
|
||||
int dst_stride_bgra,
|
||||
int width,
|
||||
int height) {
|
||||
return I420ToRGBAMatrix(src_y, src_stride_y, src_v,
|
||||
src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u, dst_bgra, dst_stride_bgra,
|
||||
&kYvuI601Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to RGB24 with matrix
|
||||
static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
static int I420ToRGB24Matrix(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width, int height) {
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToRGB24Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGB24Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
|
||||
width <= 0 || height == 0) {
|
||||
void (*I422ToRGB24Row)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I422ToRGB24Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -581,50 +623,59 @@ static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y,
|
||||
|
||||
// Convert I420 to RGB24.
|
||||
LIBYUV_API
|
||||
int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height) {
|
||||
return I420ToRGB24Matrix(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_rgb24, dst_stride_rgb24,
|
||||
&kYuvI601Constants,
|
||||
width, height);
|
||||
int I420ToRGB24(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height) {
|
||||
return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_rgb24, dst_stride_rgb24,
|
||||
&kYuvI601Constants, width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to RAW.
|
||||
LIBYUV_API
|
||||
int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_raw, int dst_stride_raw,
|
||||
int width, int height) {
|
||||
return I420ToRGB24Matrix(src_y, src_stride_y,
|
||||
src_v, src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u,
|
||||
dst_raw, dst_stride_raw,
|
||||
int I420ToRAW(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_raw,
|
||||
int dst_stride_raw,
|
||||
int width,
|
||||
int height) {
|
||||
return I420ToRGB24Matrix(src_y, src_stride_y, src_v,
|
||||
src_stride_v, // Swap U and V
|
||||
src_u, src_stride_u, dst_raw, dst_stride_raw,
|
||||
&kYvuI601Constants, // Use Yvu matrix
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert I420 to ARGB1555.
|
||||
LIBYUV_API
|
||||
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb1555, int dst_stride_argb1555,
|
||||
int width, int height) {
|
||||
int I420ToARGB1555(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb1555,
|
||||
int dst_stride_argb1555,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToARGB1555Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
void (*I422ToARGB1555Row)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGB1555Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb1555 ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_y || !src_u || !src_v || !dst_argb1555 || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -679,23 +730,25 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Convert I420 to ARGB4444.
|
||||
LIBYUV_API
|
||||
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb4444, int dst_stride_argb4444,
|
||||
int width, int height) {
|
||||
int I420ToARGB4444(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_argb4444,
|
||||
int dst_stride_argb4444,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToARGB4444Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
void (*I422ToARGB4444Row)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGB4444Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_argb4444 ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_y || !src_u || !src_v || !dst_argb4444 || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -752,20 +805,22 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
|
||||
// Convert I420 to RGB565.
|
||||
LIBYUV_API
|
||||
int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height) {
|
||||
int I420ToRGB565(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToRGB565Row)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToRGB565Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
|
||||
width <= 0 || height == 0) {
|
||||
void (*I422ToRGB565Row)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I422ToRGB565Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -821,30 +876,31 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
|
||||
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
|
||||
static const uint8 kDither565_4x4[16] = {
|
||||
0, 4, 1, 5,
|
||||
6, 2, 7, 3,
|
||||
1, 5, 0, 4,
|
||||
7, 3, 6, 2,
|
||||
0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
|
||||
};
|
||||
|
||||
// Convert I420 to RGB565 with dithering.
|
||||
LIBYUV_API
|
||||
int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
const uint8* dither4x4, int width, int height) {
|
||||
int I420ToRGB565Dither(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
const uint8* dither4x4,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) = I422ToARGBRow_C;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I422ToARGBRow_C;
|
||||
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
|
||||
const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
|
||||
width <= 0 || height == 0) {
|
||||
const uint32 dither4, int width) =
|
||||
ARGBToRGB565DitherRow_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -926,7 +982,8 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
|
||||
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)), width); // NOLINT
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)),
|
||||
width); // NOLINT
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -941,136 +998,98 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
|
||||
// Convert I420 to specified format
|
||||
LIBYUV_API
|
||||
int ConvertFromI420(const uint8* y, int y_stride,
|
||||
const uint8* u, int u_stride,
|
||||
const uint8* v, int v_stride,
|
||||
uint8* dst_sample, int dst_sample_stride,
|
||||
int width, int height,
|
||||
int ConvertFromI420(const uint8* y,
|
||||
int y_stride,
|
||||
const uint8* u,
|
||||
int u_stride,
|
||||
const uint8* v,
|
||||
int v_stride,
|
||||
uint8* dst_sample,
|
||||
int dst_sample_stride,
|
||||
int width,
|
||||
int height,
|
||||
uint32 fourcc) {
|
||||
uint32 format = CanonicalFourCC(fourcc);
|
||||
int r = 0;
|
||||
if (!y || !u|| !v || !dst_sample ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
r = I420ToYUY2(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
r = I420ToYUY2(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
r = I420ToUYVY(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
r = I420ToUYVY(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
r = I420ToRGB565(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
r = I420ToARGB1555(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
r = I420ToARGB4444(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
r = I420ToARGB4444(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 2,
|
||||
width, height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
r = I420ToRGB24(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 3,
|
||||
width, height);
|
||||
r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 3, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
r = I420ToRAW(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 3,
|
||||
width, height);
|
||||
r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 3, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
r = I420ToARGB(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4,
|
||||
width, height);
|
||||
r = I420ToARGB(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
r = I420ToBGRA(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4,
|
||||
width, height);
|
||||
r = I420ToBGRA(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
r = I420ToABGR(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4,
|
||||
width, height);
|
||||
r = I420ToABGR(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
r = I420ToRGBA(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4,
|
||||
width, height);
|
||||
r = I420ToRGBA(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width * 4, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_I400:
|
||||
r = I400Copy(y, y_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width,
|
||||
width, height);
|
||||
r = I400Copy(y, y_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width, width,
|
||||
height);
|
||||
break;
|
||||
case FOURCC_NV12: {
|
||||
uint8* dst_uv = dst_sample + width * height;
|
||||
r = I420ToNV12(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width,
|
||||
dst_uv,
|
||||
dst_sample_stride ? dst_sample_stride : width,
|
||||
width, height);
|
||||
r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width, dst_uv,
|
||||
dst_sample_stride ? dst_sample_stride : width, width,
|
||||
height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_NV21: {
|
||||
uint8* dst_vu = dst_sample + width * height;
|
||||
r = I420ToNV21(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width,
|
||||
dst_vu,
|
||||
dst_sample_stride ? dst_sample_stride : width,
|
||||
width, height);
|
||||
r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample,
|
||||
dst_sample_stride ? dst_sample_stride : width, dst_vu,
|
||||
dst_sample_stride ? dst_sample_stride : width, width,
|
||||
height);
|
||||
break;
|
||||
}
|
||||
// TODO(fbarchard): Add M420.
|
||||
@ -1089,13 +1108,8 @@ int ConvertFromI420(const uint8* y, int y_stride,
|
||||
dst_u = dst_sample + width * height;
|
||||
dst_v = dst_u + halfwidth * halfheight;
|
||||
}
|
||||
r = I420Copy(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample, width,
|
||||
dst_u, halfwidth,
|
||||
dst_v, halfwidth,
|
||||
width, height);
|
||||
r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample, width,
|
||||
dst_u, halfwidth, dst_v, halfwidth, width, height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I422:
|
||||
@ -1110,13 +1124,8 @@ int ConvertFromI420(const uint8* y, int y_stride,
|
||||
dst_u = dst_sample + width * height;
|
||||
dst_v = dst_u + halfwidth * height;
|
||||
}
|
||||
r = I420ToI422(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample, width,
|
||||
dst_u, halfwidth,
|
||||
dst_v, halfwidth,
|
||||
width, height);
|
||||
r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample, width,
|
||||
dst_u, halfwidth, dst_v, halfwidth, width, height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I444:
|
||||
@ -1130,13 +1139,8 @@ int ConvertFromI420(const uint8* y, int y_stride,
|
||||
dst_u = dst_sample + width * height;
|
||||
dst_v = dst_u + width * height;
|
||||
}
|
||||
r = I420ToI444(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
dst_sample, width,
|
||||
dst_u, width,
|
||||
dst_v, width,
|
||||
width, height);
|
||||
r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample, width,
|
||||
dst_u, width, dst_v, width, width, height);
|
||||
break;
|
||||
}
|
||||
// Formats not supported - MJPG, biplanar, some rgb formats.
|
||||
|
||||
@ -22,16 +22,21 @@ extern "C" {
|
||||
|
||||
// ARGB little endian (bgra in memory) to I444
|
||||
LIBYUV_API
|
||||
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int ARGBToI444(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
int width) = ARGBToUV444Row_C;
|
||||
int width) = ARGBToUV444Row_C;
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -41,20 +46,18 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_y == width &&
|
||||
dst_stride_u == width &&
|
||||
dst_stride_v == width) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_y == width &&
|
||||
dst_stride_u == width && dst_stride_v == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
|
||||
}
|
||||
#if defined(HAS_ARGBTOUV444ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_SSSE3;
|
||||
}
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUV444Row = ARGBToUV444Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOUV444ROW_NEON)
|
||||
@ -111,19 +114,22 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// ARGB little endian (bgra in memory) to I422
|
||||
LIBYUV_API
|
||||
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int ARGBToI422(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
|
||||
uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
if (!src_argb ||
|
||||
!dst_y || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -133,10 +139,8 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_y == width &&
|
||||
dst_stride_u * 2 == width &&
|
||||
dst_stride_v * 2 == width) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_y == width &&
|
||||
dst_stride_u * 2 == width && dst_stride_v * 2 == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
|
||||
@ -206,21 +210,23 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height) {
|
||||
int ARGBToNV12(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
|
||||
uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
int width) = MergeUVRow_C;
|
||||
if (!src_argb ||
|
||||
!dst_y || !dst_uv ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -331,21 +337,23 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Same as NV12 but U and V swapped.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height) {
|
||||
int ARGBToNV21(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, uint8* dst_u,
|
||||
uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
int width) = MergeUVRow_C;
|
||||
if (!src_argb ||
|
||||
!dst_y || !dst_uv ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -456,19 +464,22 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB to YUY2.
|
||||
LIBYUV_API
|
||||
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yuy2, int dst_stride_yuy2,
|
||||
int width, int height) {
|
||||
int ARGBToYUY2(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, uint8* dst_u,
|
||||
uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C;
|
||||
const uint8* src_v, uint8* dst_yuy2, int width) =
|
||||
I422ToYUY2Row_C;
|
||||
|
||||
if (!src_argb || !dst_yuy2 ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_argb || !dst_yuy2 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -478,8 +489,7 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
dst_stride_yuy2 = -dst_stride_yuy2;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_yuy2 == width * 2) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_yuy2 == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_yuy2 = 0;
|
||||
@ -582,19 +592,22 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB to UYVY.
|
||||
LIBYUV_API
|
||||
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_uyvy, int dst_stride_uyvy,
|
||||
int width, int height) {
|
||||
int ARGBToUYVY(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb, uint8* dst_u,
|
||||
uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C;
|
||||
const uint8* src_v, uint8* dst_uyvy, int width) =
|
||||
I422ToUYVYRow_C;
|
||||
|
||||
if (!src_argb || !dst_uyvy ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_argb || !dst_uyvy || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -604,8 +617,7 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
dst_stride_uyvy = -dst_stride_uyvy;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_uyvy == width * 2) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_uyvy == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_uyvy = 0;
|
||||
@ -708,9 +720,12 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB to I400.
|
||||
LIBYUV_API
|
||||
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height) {
|
||||
int ARGBToI400(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
|
||||
ARGBToYRow_C;
|
||||
@ -723,8 +738,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_y == width) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_y == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_y = 0;
|
||||
@ -771,26 +785,29 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
|
||||
// Shuffle table for converting ARGB to RGBA.
|
||||
static uvec8 kShuffleMaskARGBToRGBA = {
|
||||
3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u
|
||||
};
|
||||
static uvec8 kShuffleMaskARGBToRGBA = {3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u,
|
||||
11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u};
|
||||
|
||||
// Convert ARGB to RGBA.
|
||||
LIBYUV_API
|
||||
int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height) {
|
||||
return ARGBShuffle(src_argb, src_stride_argb,
|
||||
dst_rgba, dst_stride_rgba,
|
||||
(const uint8*)(&kShuffleMaskARGBToRGBA),
|
||||
width, height);
|
||||
int ARGBToRGBA(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
int width,
|
||||
int height) {
|
||||
return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba,
|
||||
(const uint8*)(&kShuffleMaskARGBToRGBA), width, height);
|
||||
}
|
||||
|
||||
// Convert ARGB To RGB24.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height) {
|
||||
int ARGBToRGB24(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToRGB24Row_C;
|
||||
@ -803,8 +820,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_rgb24 == width * 3) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_rgb24 == width * 3) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_rgb24 = 0;
|
||||
@ -836,9 +852,12 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB To RAW.
|
||||
LIBYUV_API
|
||||
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_raw, int dst_stride_raw,
|
||||
int width, int height) {
|
||||
int ARGBToRAW(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_raw,
|
||||
int dst_stride_raw,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToRAWRow_C;
|
||||
@ -851,8 +870,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_raw == width * 3) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_raw == width * 3) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_raw = 0;
|
||||
@ -884,20 +902,22 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
|
||||
static const uint8 kDither565_4x4[16] = {
|
||||
0, 4, 1, 5,
|
||||
6, 2, 7, 3,
|
||||
1, 5, 0, 4,
|
||||
7, 3, 6, 2,
|
||||
0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2,
|
||||
};
|
||||
|
||||
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
const uint8* dither4x4, int width, int height) {
|
||||
int ARGBToRGB565Dither(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
const uint8* dither4x4,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
|
||||
const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
|
||||
const uint32 dither4, int width) =
|
||||
ARGBToRGB565DitherRow_C;
|
||||
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -935,7 +955,8 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)), width); /* NOLINT */
|
||||
*(uint32*)(dither4x4 + ((y & 3) << 2)),
|
||||
width); /* NOLINT */
|
||||
src_argb += src_stride_argb;
|
||||
dst_rgb565 += dst_stride_rgb565;
|
||||
}
|
||||
@ -945,9 +966,12 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
|
||||
// Convert ARGB To RGB565.
|
||||
// TODO(fbarchard): Consider using dither function low level with zeros.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height) {
|
||||
int ARGBToRGB565(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToRGB565Row_C;
|
||||
@ -960,8 +984,7 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_rgb565 == width * 2) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_rgb565 == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_rgb565 = 0;
|
||||
@ -1001,9 +1024,12 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB To ARGB1555.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb1555, int dst_stride_argb1555,
|
||||
int width, int height) {
|
||||
int ARGBToARGB1555(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb1555,
|
||||
int dst_stride_argb1555,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToARGB1555Row_C;
|
||||
@ -1016,8 +1042,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_argb1555 == width * 2) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_argb1555 == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_argb1555 = 0;
|
||||
@ -1057,9 +1082,12 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB To ARGB4444.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb4444, int dst_stride_argb4444,
|
||||
int width, int height) {
|
||||
int ARGBToARGB4444(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb4444,
|
||||
int dst_stride_argb4444,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToARGB4444Row_C;
|
||||
@ -1072,8 +1100,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_argb4444 == width * 2) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_argb4444 == width * 2) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_argb4444 = 0;
|
||||
@ -1113,19 +1140,22 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB to J420. (JPeg full range I420).
|
||||
LIBYUV_API
|
||||
int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int ARGBToJ420(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
if (!src_argb ||
|
||||
!dst_yj || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -1187,19 +1217,22 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB to J422. (JPeg full range I422).
|
||||
LIBYUV_API
|
||||
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height) {
|
||||
int ARGBToJ422(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
if (!src_argb ||
|
||||
!dst_yj || !dst_u || !dst_v ||
|
||||
width <= 0 || height == 0) {
|
||||
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
@ -1209,10 +1242,8 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_yj == width &&
|
||||
dst_stride_u * 2 == width &&
|
||||
dst_stride_v * 2 == width) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_yj == width &&
|
||||
dst_stride_u * 2 == width && dst_stride_v * 2 == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0;
|
||||
@ -1265,9 +1296,12 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
|
||||
// Convert ARGB to J400.
|
||||
LIBYUV_API
|
||||
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
int width, int height) {
|
||||
int ARGBToJ400(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
|
||||
ARGBToYJRow_C;
|
||||
@ -1280,8 +1314,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb == width * 4 &&
|
||||
dst_stride_yj == width) {
|
||||
if (src_stride_argb == width * 4 && dst_stride_yj == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb = dst_stride_yj = 0;
|
||||
|
||||
@ -37,13 +37,9 @@ static void JpegCopyI420(void* opaque,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I420Copy(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
I420Copy(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
@ -55,13 +51,9 @@ static void JpegI422ToI420(void* opaque,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I422ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
I422ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
@ -73,13 +65,9 @@ static void JpegI444ToI420(void* opaque,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I444ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
I444ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
@ -91,11 +79,8 @@ static void JpegI400ToI420(void* opaque,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I400ToI420(data[0], strides[0],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
I400ToI420(data[0], strides[0], dest->y, dest->y_stride, dest->u,
|
||||
dest->u_stride, dest->v, dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
@ -104,8 +89,7 @@ static void JpegI400ToI420(void* opaque,
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
int* width, int* height) {
|
||||
int MJPGSize(const uint8* sample, size_t sample_size, int* width, int* height) {
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret) {
|
||||
@ -121,11 +105,16 @@ int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* y, int y_stride,
|
||||
uint8* u, int u_stride,
|
||||
uint8* v, int v_stride,
|
||||
int w, int h,
|
||||
int dw, int dh) {
|
||||
uint8* y,
|
||||
int y_stride,
|
||||
uint8* u,
|
||||
int u_stride,
|
||||
uint8* v,
|
||||
int v_stride,
|
||||
int w,
|
||||
int h,
|
||||
int dw,
|
||||
int dh) {
|
||||
if (sample_size == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
@ -134,17 +123,16 @@ int MJPGToI420(const uint8* sample,
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != w ||
|
||||
mjpeg_decoder.GetHeight() != h)) {
|
||||
if (ret &&
|
||||
(mjpeg_decoder.GetWidth() != w || mjpeg_decoder.GetHeight() != h)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
|
||||
I420Buffers bufs = {y, y_stride, u, u_stride, v, v_stride, dw, dh};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
@ -153,7 +141,7 @@ int MJPGToI420(const uint8* sample,
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
|
||||
// YUV422
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
@ -164,7 +152,7 @@ int MJPGToI420(const uint8* sample,
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
|
||||
// YUV444
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
@ -175,7 +163,7 @@ int MJPGToI420(const uint8* sample,
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
|
||||
// YUV400
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
@ -202,15 +190,12 @@ struct ARGBBuffers {
|
||||
};
|
||||
|
||||
static void JpegI420ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I420ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
I420ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
@ -220,11 +205,8 @@ static void JpegI422ToARGB(void* opaque,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I422ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
I422ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
@ -234,11 +216,8 @@ static void JpegI444ToARGB(void* opaque,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I444ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
I444ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
@ -248,9 +227,7 @@ static void JpegI400ToARGB(void* opaque,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I400ToARGB(data[0], strides[0],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
I400ToARGB(data[0], strides[0], dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
@ -260,9 +237,12 @@ static void JpegI400ToARGB(void* opaque,
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* argb, int argb_stride,
|
||||
int w, int h,
|
||||
int dw, int dh) {
|
||||
uint8* argb,
|
||||
int argb_stride,
|
||||
int w,
|
||||
int h,
|
||||
int dw,
|
||||
int dh) {
|
||||
if (sample_size == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
@ -271,17 +251,16 @@ int MJPGToARGB(const uint8* sample,
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != w ||
|
||||
mjpeg_decoder.GetHeight() != h)) {
|
||||
if (ret &&
|
||||
(mjpeg_decoder.GetWidth() != w || mjpeg_decoder.GetHeight() != h)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
ARGBBuffers bufs = { argb, argb_stride, dw, dh };
|
||||
ARGBBuffers bufs = {argb, argb_stride, dw, dh};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
@ -290,7 +269,7 @@ int MJPGToARGB(const uint8* sample,
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
|
||||
// YUV422
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
@ -301,7 +280,7 @@ int MJPGToARGB(const uint8* sample,
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
|
||||
// YUV444
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
@ -312,7 +291,7 @@ int MJPGToARGB(const uint8* sample,
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
|
||||
// YUV400
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
|
||||
@ -29,11 +29,16 @@ extern "C" {
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
// With MJPEG it is the compressed size of the frame.
|
||||
LIBYUV_API
|
||||
int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
uint8* crop_argb, int argb_stride,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
int ConvertToARGB(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* crop_argb,
|
||||
int argb_stride,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 fourcc) {
|
||||
uint32 format = CanonicalFourCC(fourcc);
|
||||
@ -49,16 +54,15 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
// and then rotate the I420 to the final destination buffer.
|
||||
// For in-place conversion, if destination crop_argb is same as source sample,
|
||||
// also enable temporary buffer.
|
||||
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
|
||||
crop_argb == sample;
|
||||
LIBYUV_BOOL need_buf =
|
||||
(rotation && format != FOURCC_ARGB) || crop_argb == sample;
|
||||
uint8* dest_argb = crop_argb;
|
||||
int dest_argb_stride = argb_stride;
|
||||
uint8* rotate_buffer = NULL;
|
||||
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
|
||||
if (crop_argb == NULL || sample == NULL ||
|
||||
src_width <= 0 || crop_width <= 0 ||
|
||||
src_height == 0 || crop_height == 0) {
|
||||
if (crop_argb == NULL || sample == NULL || src_width <= 0 ||
|
||||
crop_width <= 0 || src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
if (src_height < 0) {
|
||||
@ -67,7 +71,7 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
|
||||
if (need_buf) {
|
||||
int argb_size = crop_width * 4 * abs_crop_height;
|
||||
rotate_buffer = (uint8*)malloc(argb_size); /* NOLINT */
|
||||
rotate_buffer = (uint8*)malloc(argb_size); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
@ -79,100 +83,83 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = YUY2ToARGB(src, aligned_src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
r = YUY2ToARGB(src, aligned_src_width * 2, crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = UYVYToARGB(src, aligned_src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
r = UYVYToARGB(src, aligned_src_width * 2, crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RGB24ToARGB(src, src_width * 3,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGB24ToARGB(src, src_width * 3, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RAWToARGB(src, src_width * 3,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RAWToARGB(src, src_width * 3, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ARGBToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = BGRAToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = BGRAToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ABGRToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = RGBAToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGBAToARGB(src, src_width * 4, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGB565ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB1555ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ARGB1555ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB4444ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ARGB4444ToARGB(src, src_width * 2, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_I400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = I400ToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I400ToARGB(src, src_width, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
|
||||
// Biplanar formats
|
||||
case FOURCC_NV12:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
|
||||
r = NV12ToARGB(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, crop_argb,
|
||||
argb_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_NV21:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
|
||||
// Call NV12 but with u and v parameters swapped.
|
||||
r = NV21ToARGB(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, crop_argb,
|
||||
argb_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_M420:
|
||||
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
|
||||
r = M420ToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = M420ToARGB(src, src_width, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
@ -184,20 +171,17 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
if (format == FOURCC_YV12) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
}
|
||||
r = I420ToARGB(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
crop_argb, argb_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -208,14 +192,11 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
r = J420ToARGB(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
crop_argb, argb_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -226,21 +207,18 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
const uint8* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
if (format == FOURCC_YV16) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
}
|
||||
r = I422ToARGB(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
crop_argb, argb_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I444:
|
||||
@ -255,18 +233,14 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
}
|
||||
r = I444ToARGB(src_y, src_width,
|
||||
src_u, src_width,
|
||||
src_v, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
|
||||
crop_argb, argb_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToARGB(sample, sample_size,
|
||||
crop_argb, argb_stride,
|
||||
src_width, abs_src_height, crop_width, inv_crop_height);
|
||||
r = MJPGToARGB(sample, sample_size, crop_argb, argb_stride, src_width,
|
||||
abs_src_height, crop_width, inv_crop_height);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@ -275,8 +249,7 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = ARGBRotate(crop_argb, argb_stride,
|
||||
dest_argb, dest_argb_stride,
|
||||
r = ARGBRotate(crop_argb, argb_stride, dest_argb, dest_argb_stride,
|
||||
crop_width, abs_crop_height, rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
|
||||
@ -27,12 +27,18 @@ extern "C" {
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* y, int y_stride,
|
||||
uint8* u, int u_stride,
|
||||
uint8* v, int v_stride,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
uint8* y,
|
||||
int y_stride,
|
||||
uint8* u,
|
||||
int u_stride,
|
||||
uint8* v,
|
||||
int v_stride,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 fourcc) {
|
||||
uint32 format = CanonicalFourCC(fourcc);
|
||||
@ -43,9 +49,10 @@ int ConvertToI420(const uint8* sample,
|
||||
// TODO(nisse): Why allow crop_height < 0?
|
||||
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
|
||||
format != FOURCC_NV12 && format != FOURCC_NV21 &&
|
||||
format != FOURCC_YV12) || y == sample;
|
||||
LIBYUV_BOOL need_buf =
|
||||
(rotation && format != FOURCC_I420 && format != FOURCC_NV12 &&
|
||||
format != FOURCC_NV21 && format != FOURCC_YV12) ||
|
||||
y == sample;
|
||||
uint8* tmp_y = y;
|
||||
uint8* tmp_u = u;
|
||||
uint8* tmp_v = v;
|
||||
@ -56,8 +63,7 @@ int ConvertToI420(const uint8* sample,
|
||||
const int inv_crop_height =
|
||||
(src_height < 0) ? -abs_crop_height : abs_crop_height;
|
||||
|
||||
if (!y || !u || !v || !sample ||
|
||||
src_width <= 0 || crop_width <= 0 ||
|
||||
if (!y || !u || !v || !sample || src_width <= 0 || crop_width <= 0 ||
|
||||
src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -70,7 +76,7 @@ int ConvertToI420(const uint8* sample,
|
||||
if (need_buf) {
|
||||
int y_size = crop_width * abs_crop_height;
|
||||
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
|
||||
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); /* NOLINT */
|
||||
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
@ -85,130 +91,85 @@ int ConvertToI420(const uint8* sample,
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = YUY2ToI420(src, aligned_src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = YUY2ToI420(src, aligned_src_width * 2, y, y_stride, u, u_stride, v,
|
||||
v_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = UYVYToI420(src, aligned_src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = UYVYToI420(src, aligned_src_width * 2, y, y_stride, u, u_stride, v,
|
||||
v_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGB565ToI420(src, src_width * 2, y, y_stride, u, u_stride, v,
|
||||
v_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB1555ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ARGB1555ToI420(src, src_width * 2, y, y_stride, u, u_stride, v,
|
||||
v_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB4444ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ARGB4444ToI420(src, src_width * 2, y, y_stride, u, u_stride, v,
|
||||
v_stride, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RGB24ToI420(src, src_width * 3,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = RGB24ToI420(src, src_width * 3, y, y_stride, u, u_stride, v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RAWToI420(src, src_width * 3,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = RAWToI420(src, src_width * 3, y, y_stride, u, u_stride, v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = ARGBToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = BGRAToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = BGRAToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = ABGRToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = RGBAToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = RGBAToI420(src, src_width * 4, y, y_stride, u, u_stride, v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_I400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = I400ToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = I400ToI420(src, src_width, y, y_stride, u, u_stride, v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
// Biplanar formats
|
||||
case FOURCC_NV12:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + (src_width * src_height) +
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
r = NV12ToI420Rotate(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, y,
|
||||
y_stride, u, u_stride, v, v_stride, crop_width,
|
||||
inv_crop_height, rotation);
|
||||
break;
|
||||
case FOURCC_NV21:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + (src_width * src_height) +
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
// Call NV12 but with u and v parameters swapped.
|
||||
r = NV12ToI420Rotate(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
y, y_stride,
|
||||
v, v_stride,
|
||||
u, u_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, y,
|
||||
y_stride, v, v_stride, u, u_stride, crop_width,
|
||||
inv_crop_height, rotation);
|
||||
break;
|
||||
case FOURCC_M420:
|
||||
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
|
||||
r = M420ToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = M420ToI420(src, src_width, y, y_stride, u, u_stride, v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
// Triplanar formats
|
||||
@ -221,22 +182,18 @@ int ConvertToI420(const uint8* sample,
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
if (format == FOURCC_YV12) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
}
|
||||
r = I420Rotate(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth, y,
|
||||
y_stride, u, u_stride, v, v_stride, crop_width,
|
||||
inv_crop_height, rotation);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I422:
|
||||
@ -246,23 +203,19 @@ int ConvertToI420(const uint8* sample,
|
||||
const uint8* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
if (format == FOURCC_YV16) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
}
|
||||
r = I422ToI420(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth, y,
|
||||
y_stride, u, u_stride, v, v_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I444:
|
||||
@ -277,21 +230,14 @@ int ConvertToI420(const uint8* sample,
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
}
|
||||
r = I444ToI420(src_y, src_width,
|
||||
src_u, src_width,
|
||||
src_v, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width, y,
|
||||
y_stride, u, u_stride, v, v_stride, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToI420(sample, sample_size,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
r = MJPGToI420(sample, sample_size, y, y_stride, u, u_stride, v, v_stride,
|
||||
src_width, abs_src_height, crop_width, inv_crop_height);
|
||||
break;
|
||||
#endif
|
||||
@ -301,13 +247,9 @@ int ConvertToI420(const uint8* sample,
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = I420Rotate(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
tmp_y, tmp_y_stride,
|
||||
tmp_u, tmp_u_stride,
|
||||
tmp_v, tmp_v_stride,
|
||||
crop_width, abs_crop_height, rotation);
|
||||
r = I420Rotate(y, y_stride, u, u_stride, v, v_stride, tmp_y, tmp_y_stride,
|
||||
tmp_u, tmp_u_stride, tmp_v, tmp_v_stride, crop_width,
|
||||
abs_crop_height, rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h> // For __cpuidex()
|
||||
#endif
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
!defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
|
||||
defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
#include <immintrin.h> // For _xgetbv()
|
||||
@ -44,8 +44,8 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// Low level cpuid for X86.
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(__x86_64__)) && \
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER)
|
||||
LIBYUV_API
|
||||
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
@ -74,18 +74,18 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
// GCC version uses inline x86 assembly.
|
||||
#else // defined(_MSC_VER)
|
||||
uint32 info_ebx, info_edx;
|
||||
asm volatile (
|
||||
#if defined( __i386__) && defined(__PIC__)
|
||||
// Preserve ebx for fpic 32 bit.
|
||||
"mov %%ebx, %%edi \n"
|
||||
"cpuid \n"
|
||||
"xchg %%edi, %%ebx \n"
|
||||
: "=D" (info_ebx),
|
||||
asm volatile(
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
// Preserve ebx for fpic 32 bit.
|
||||
"mov %%ebx, %%edi \n"
|
||||
"cpuid \n"
|
||||
"xchg %%edi, %%ebx \n"
|
||||
: "=D"(info_ebx),
|
||||
#else
|
||||
"cpuid \n"
|
||||
: "=b" (info_ebx),
|
||||
"cpuid \n"
|
||||
: "=b"(info_ebx),
|
||||
#endif // defined( __i386__) && defined(__PIC__)
|
||||
"+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
|
||||
"+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
|
||||
cpu_info[0] = info_eax;
|
||||
cpu_info[1] = info_ebx;
|
||||
cpu_info[2] = info_ecx;
|
||||
@ -111,8 +111,8 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
|
||||
#if defined(_M_IX86) && (_MSC_VER < 1900)
|
||||
#pragma optimize("g", off)
|
||||
#endif
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(__x86_64__)) && \
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
|
||||
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
|
||||
int GetXCR0() {
|
||||
@ -120,7 +120,7 @@ int GetXCR0() {
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
|
||||
asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
|
||||
#endif // defined(__i386__) || defined(__x86_64__)
|
||||
return xcr0;
|
||||
}
|
||||
@ -135,8 +135,7 @@ int GetXCR0() {
|
||||
|
||||
// based on libvpx arm_cpudetect.c
|
||||
// For Arm, but public to allow testing on any CPU
|
||||
LIBYUV_API SAFEBUFFERS
|
||||
int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
char cpuinfo_line[512];
|
||||
FILE* f = fopen(cpuinfo_name, "r");
|
||||
if (!f) {
|
||||
@ -163,8 +162,8 @@ int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API SAFEBUFFERS
|
||||
int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) {
|
||||
LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
|
||||
const char ase[]) {
|
||||
char cpuinfo_line[512];
|
||||
int len = (int)strlen(ase);
|
||||
FILE* f = fopen(cpuinfo_name, "r");
|
||||
@ -218,20 +217,18 @@ static LIBYUV_BOOL TestEnv(const char*) {
|
||||
}
|
||||
#endif
|
||||
|
||||
LIBYUV_API SAFEBUFFERS
|
||||
int InitCpuFlags(void) {
|
||||
LIBYUV_API SAFEBUFFERS int InitCpuFlags(void) {
|
||||
int cpu_info = 0;
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
|
||||
uint32 cpu_info0[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info0[4] = {0, 0, 0, 0};
|
||||
uint32 cpu_info1[4] = {0, 0, 0, 0};
|
||||
uint32 cpu_info7[4] = {0, 0, 0, 0};
|
||||
CpuId(0, 0, cpu_info0);
|
||||
CpuId(1, 0, cpu_info1);
|
||||
if (cpu_info0[0] >= 7) {
|
||||
CpuId(7, 0, cpu_info7);
|
||||
}
|
||||
cpu_info = kCpuHasX86 |
|
||||
((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
||||
@ -240,8 +237,7 @@ int InitCpuFlags(void) {
|
||||
// AVX requires OS saves YMM registers.
|
||||
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
|
||||
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
|
||||
cpu_info |= kCpuHasAVX |
|
||||
((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
|
||||
cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
|
||||
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
|
||||
((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
|
||||
|
||||
@ -326,7 +322,7 @@ int InitCpuFlags(void) {
|
||||
if (TestEnv("LIBYUV_DISABLE_ASM")) {
|
||||
cpu_info = 0;
|
||||
}
|
||||
cpu_info |= kCpuInitialized;
|
||||
cpu_info |= kCpuInitialized;
|
||||
cpu_info_ = cpu_info;
|
||||
return cpu_info;
|
||||
}
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// disable warning 4324: structure was padded due to __declspec(align())
|
||||
#pragma warning(disable:4324)
|
||||
#pragma warning(disable : 4324)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -129,7 +129,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
|
||||
if (scanlines_[i]) {
|
||||
delete scanlines_[i];
|
||||
}
|
||||
scanlines_[i] = new uint8* [scanlines_size];
|
||||
scanlines_[i] = new uint8*[scanlines_size];
|
||||
scanlines_sizes_[i] = scanlines_size;
|
||||
}
|
||||
|
||||
@ -195,13 +195,11 @@ int MJpegDecoder::GetVertSampFactor(int component) {
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetHorizSubSampFactor(int component) {
|
||||
return decompress_struct_->max_h_samp_factor /
|
||||
GetHorizSampFactor(component);
|
||||
return decompress_struct_->max_h_samp_factor / GetHorizSampFactor(component);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetVertSubSampFactor(int component) {
|
||||
return decompress_struct_->max_v_samp_factor /
|
||||
GetVertSampFactor(component);
|
||||
return decompress_struct_->max_v_samp_factor / GetVertSampFactor(component);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetImageScanlinesPerImcuRow() {
|
||||
@ -245,10 +243,10 @@ LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
uint8** planes, int dst_width, int dst_height) {
|
||||
if (dst_width != GetWidth() ||
|
||||
dst_height > GetHeight()) {
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8** planes,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (dst_width != GetWidth() || dst_height > GetHeight()) {
|
||||
// ERROR: Bad dimensions
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
@ -289,14 +287,13 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
// TODO(fbarchard): Compute skip to avoid this
|
||||
assert(skip % GetVertSubSampFactor(i) == 0);
|
||||
int rows_to_skip =
|
||||
DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
|
||||
rows_to_skip;
|
||||
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int scanlines_to_copy =
|
||||
GetComponentScanlinesPerImcuRow(i) - rows_to_skip;
|
||||
int data_to_skip = rows_to_skip * GetComponentStride(i);
|
||||
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i),
|
||||
scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
lines_left -= (GetImageScanlinesPerImcuRow() - skip);
|
||||
@ -305,16 +302,15 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
|
||||
// Read full MCUs but cropped horizontally
|
||||
for (; lines_left > GetImageScanlinesPerImcuRow();
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
|
||||
CopyPlane(databuf_[i], GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
CopyPlane(databuf_[i], GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
}
|
||||
@ -328,19 +324,19 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_to_copy =
|
||||
DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
|
||||
CopyPlane(databuf_[i], GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
CopyPlane(databuf_[i], GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
}
|
||||
return FinishDecode();
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
|
||||
int dst_width, int dst_height) {
|
||||
if (dst_width != GetWidth() ||
|
||||
dst_height > GetHeight()) {
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn,
|
||||
void* opaque,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (dst_width != GetWidth() || dst_height > GetHeight()) {
|
||||
// ERROR: Bad dimensions
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
@ -395,7 +391,7 @@ LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
|
||||
}
|
||||
// Read full MCUs until we get to the crop point.
|
||||
for (; lines_left >= GetImageScanlinesPerImcuRow();
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
@ -440,17 +436,17 @@ void term_source(j_decompress_ptr cinfo) {
|
||||
|
||||
#ifdef HAVE_SETJMP
|
||||
void ErrorHandler(j_common_ptr cinfo) {
|
||||
// This is called when a jpeglib command experiences an error. Unfortunately
|
||||
// jpeglib's error handling model is not very flexible, because it expects the
|
||||
// error handler to not return--i.e., it wants the program to terminate. To
|
||||
// recover from errors we use setjmp() as shown in their example. setjmp() is
|
||||
// C's implementation for the "call with current continuation" functionality
|
||||
// seen in some functional programming languages.
|
||||
// A formatted message can be output, but is unsafe for release.
|
||||
// This is called when a jpeglib command experiences an error. Unfortunately
|
||||
// jpeglib's error handling model is not very flexible, because it expects the
|
||||
// error handler to not return--i.e., it wants the program to terminate. To
|
||||
// recover from errors we use setjmp() as shown in their example. setjmp() is
|
||||
// C's implementation for the "call with current continuation" functionality
|
||||
// seen in some functional programming languages.
|
||||
// A formatted message can be output, but is unsafe for release.
|
||||
#ifdef DEBUG
|
||||
char buf[JMSG_LENGTH_MAX];
|
||||
(*cinfo->err->format_message)(cinfo, buf);
|
||||
// ERROR: Error in jpeglib: buf
|
||||
// ERROR: Error in jpeglib: buf
|
||||
#endif
|
||||
|
||||
SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
|
||||
@ -472,9 +468,9 @@ void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
|
||||
// it.
|
||||
DestroyOutputBuffers();
|
||||
|
||||
scanlines_ = new uint8** [num_outbufs];
|
||||
scanlines_ = new uint8**[num_outbufs];
|
||||
scanlines_sizes_ = new int[num_outbufs];
|
||||
databuf_ = new uint8* [num_outbufs];
|
||||
databuf_ = new uint8*[num_outbufs];
|
||||
databuf_strides_ = new int[num_outbufs];
|
||||
|
||||
for (int i = 0; i < num_outbufs; ++i) {
|
||||
@ -490,13 +486,13 @@ void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
|
||||
|
||||
void MJpegDecoder::DestroyOutputBuffers() {
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
delete [] scanlines_[i];
|
||||
delete [] databuf_[i];
|
||||
delete[] scanlines_[i];
|
||||
delete[] databuf_[i];
|
||||
}
|
||||
delete [] scanlines_;
|
||||
delete [] databuf_;
|
||||
delete [] scanlines_sizes_;
|
||||
delete [] databuf_strides_;
|
||||
delete[] scanlines_;
|
||||
delete[] databuf_;
|
||||
delete[] scanlines_sizes_;
|
||||
delete[] databuf_strides_;
|
||||
scanlines_ = NULL;
|
||||
databuf_ = NULL;
|
||||
scanlines_sizes_ = NULL;
|
||||
@ -542,26 +538,26 @@ void MJpegDecoder::SetScanlinePointers(uint8** data) {
|
||||
|
||||
inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
|
||||
return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
|
||||
jpeg_read_raw_data(decompress_struct_,
|
||||
scanlines_,
|
||||
GetImageScanlinesPerImcuRow());
|
||||
jpeg_read_raw_data(decompress_struct_, scanlines_,
|
||||
GetImageScanlinesPerImcuRow());
|
||||
}
|
||||
|
||||
// The helper function which recognizes the jpeg sub-sampling type.
|
||||
JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
|
||||
int* subsample_x, int* subsample_y, int number_of_components) {
|
||||
int* subsample_x,
|
||||
int* subsample_y,
|
||||
int number_of_components) {
|
||||
if (number_of_components == 3) { // Color images.
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 2 && subsample_y[1] == 2 &&
|
||||
subsample_x[2] == 2 && subsample_y[2] == 2) {
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
|
||||
subsample_y[1] == 2 && subsample_x[2] == 2 && subsample_y[2] == 2) {
|
||||
return kJpegYuv420;
|
||||
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 2 && subsample_y[1] == 1 &&
|
||||
subsample_x[2] == 2 && subsample_y[2] == 1) {
|
||||
subsample_x[1] == 2 && subsample_y[1] == 1 &&
|
||||
subsample_x[2] == 2 && subsample_y[2] == 1) {
|
||||
return kJpegYuv422;
|
||||
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 1 && subsample_y[1] == 1 &&
|
||||
subsample_x[2] == 1 && subsample_y[2] == 1) {
|
||||
subsample_x[1] == 1 && subsample_y[1] == 1 &&
|
||||
subsample_x[2] == 1 && subsample_y[2] == 1) {
|
||||
return kJpegYuv444;
|
||||
}
|
||||
} else if (number_of_components == 1) { // Grey-scale images.
|
||||
@ -574,4 +570,3 @@ JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
|
||||
|
||||
} // namespace libyuv
|
||||
#endif // HAVE_JPEG
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@ static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
|
||||
const uint8* it = sample;
|
||||
while (it < end) {
|
||||
// TODO(fbarchard): scan for 0xd9 instead.
|
||||
it = static_cast<const uint8 *>(memchr(it, 0xff, end - it));
|
||||
it = static_cast<const uint8*>(memchr(it, 0xff, end - it));
|
||||
if (it == NULL) {
|
||||
break;
|
||||
}
|
||||
@ -68,4 +68,3 @@ LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
304
source/rotate.cc
304
source/rotate.cc
@ -22,12 +22,15 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_API
|
||||
void TransposePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void TransposePlane(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i = height;
|
||||
void (*TransposeWx8)(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) = TransposeWx8_C;
|
||||
void (*TransposeWx8)(const uint8* src, int src_stride, uint8* dst,
|
||||
int dst_stride, int width) = TransposeWx8_C;
|
||||
#if defined(HAS_TRANSPOSEWX8_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
TransposeWx8 = TransposeWx8_NEON;
|
||||
@ -51,8 +54,8 @@ void TransposePlane(const uint8* src, int src_stride,
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEWX8_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
if (IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) &&
|
||||
IS_ALIGNED(src_stride, 4)) {
|
||||
TransposeWx8 = TransposeWx8_Fast_DSPR2;
|
||||
} else {
|
||||
TransposeWx8 = TransposeWx8_DSPR2;
|
||||
@ -63,8 +66,8 @@ void TransposePlane(const uint8* src, int src_stride,
|
||||
// Work across the source in 8x8 tiles
|
||||
while (i >= 8) {
|
||||
TransposeWx8(src, src_stride, dst, dst_stride, width);
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst += 8; // Move over 8 columns.
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst += 8; // Move over 8 columns.
|
||||
i -= 8;
|
||||
}
|
||||
|
||||
@ -74,9 +77,12 @@ void TransposePlane(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane90(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void RotatePlane90(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 90 is a transpose with the source read
|
||||
// from bottom to top. So set the source pointer to the end
|
||||
// of the buffer and flip the sign of the source stride.
|
||||
@ -86,9 +92,12 @@ void RotatePlane90(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane270(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void RotatePlane270(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 270 is a transpose with the destination written
|
||||
// from bottom to top. So set the destination pointer to the end
|
||||
// of the buffer and flip the sign of the destination stride.
|
||||
@ -98,9 +107,12 @@ void RotatePlane270(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void RotatePlane180(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Swap first and last row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64(row, width);
|
||||
const uint8* src_bot = src + src_stride * (height - 1);
|
||||
@ -135,9 +147,9 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
#endif
|
||||
// TODO(fbarchard): Mirror on mips handle unaligned memory.
|
||||
#if defined(HAS_MIRRORROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) &&
|
||||
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst, 4) &&
|
||||
IS_ALIGNED(dst_stride, 4)) {
|
||||
MirrorRow = MirrorRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
@ -147,7 +159,7 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
if (IS_ALIGNED(width, 64)) {
|
||||
MirrorRow = MirrorRow_MSA;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
@ -189,14 +201,17 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void TransposeUV(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void TransposeUV(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int i = height;
|
||||
void (*TransposeUVWx8)(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
void (*TransposeUVWx8)(const uint8* src, int src_stride, uint8* dst_a,
|
||||
int dst_stride_a, uint8* dst_b, int dst_stride_b,
|
||||
int width) = TransposeUVWx8_C;
|
||||
#if defined(HAS_TRANSPOSEUVWX8_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
@ -212,68 +227,72 @@ void TransposeUV(const uint8* src, int src_stride,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEUVWX8_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) &&
|
||||
IS_ALIGNED(src_stride, 4)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Work through the source in 8x8 tiles.
|
||||
while (i >= 8) {
|
||||
TransposeUVWx8(src, src_stride,
|
||||
dst_a, dst_stride_a,
|
||||
dst_b, dst_stride_b,
|
||||
TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
|
||||
width);
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst_a += 8; // Move over 8 columns.
|
||||
dst_b += 8; // Move over 8 columns.
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst_a += 8; // Move over 8 columns.
|
||||
dst_b += 8; // Move over 8 columns.
|
||||
i -= 8;
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
TransposeUVWxH_C(src, src_stride,
|
||||
dst_a, dst_stride_a,
|
||||
dst_b, dst_stride_b,
|
||||
TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
|
||||
width, i);
|
||||
}
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV90(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void RotateUV90(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
src += src_stride * (height - 1);
|
||||
src_stride = -src_stride;
|
||||
|
||||
TransposeUV(src, src_stride,
|
||||
dst_a, dst_stride_a,
|
||||
dst_b, dst_stride_b,
|
||||
width, height);
|
||||
TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width,
|
||||
height);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV270(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void RotateUV270(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
dst_a += dst_stride_a * (width - 1);
|
||||
dst_b += dst_stride_b * (width - 1);
|
||||
dst_stride_a = -dst_stride_a;
|
||||
dst_stride_b = -dst_stride_b;
|
||||
|
||||
TransposeUV(src, src_stride,
|
||||
dst_a, dst_stride_a,
|
||||
dst_b, dst_stride_b,
|
||||
width, height);
|
||||
TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width,
|
||||
height);
|
||||
}
|
||||
|
||||
// Rotate 180 is a horizontal and vertical flip.
|
||||
LIBYUV_API
|
||||
void RotateUV180(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void RotateUV180(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
|
||||
MirrorUVRow_C;
|
||||
@ -288,8 +307,8 @@ void RotateUV180(const uint8* src, int src_stride,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src, 4) &&
|
||||
IS_ALIGNED(src_stride, 4)) {
|
||||
MirrorUVRow = MirrorUVRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
@ -306,9 +325,12 @@ void RotateUV180(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int RotatePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height,
|
||||
int RotatePlane(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
if (!src || width <= 0 || height == 0 || !dst) {
|
||||
return -1;
|
||||
@ -324,24 +346,16 @@ int RotatePlane(const uint8* src, int src_stride,
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
CopyPlane(src, src_stride,
|
||||
dst, dst_stride,
|
||||
width, height);
|
||||
CopyPlane(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate90:
|
||||
RotatePlane90(src, src_stride,
|
||||
dst, dst_stride,
|
||||
width, height);
|
||||
RotatePlane90(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270(src, src_stride,
|
||||
dst, dst_stride,
|
||||
width, height);
|
||||
RotatePlane270(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180(src, src_stride,
|
||||
dst, dst_stride,
|
||||
width, height);
|
||||
RotatePlane180(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
@ -350,18 +364,25 @@ int RotatePlane(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height,
|
||||
int I420Rotate(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
|
||||
!dst_y || !dst_u || !dst_v) {
|
||||
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
|
||||
!dst_u || !dst_v) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -380,45 +401,29 @@ int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return I420Copy(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_y, dst_stride_y,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
width, height);
|
||||
return I420Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, width, height);
|
||||
case kRotate90:
|
||||
RotatePlane90(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane90(src_u, src_stride_u,
|
||||
dst_u, dst_stride_u,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane90(src_v, src_stride_v,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane270(src_u, src_stride_u,
|
||||
dst_u, dst_stride_u,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane270(src_v, src_stride_v,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane180(src_u, src_stride_u,
|
||||
dst_u, dst_stride_u,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane180(src_v, src_stride_v,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
@ -427,17 +432,23 @@ int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height,
|
||||
int NV12ToI420Rotate(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !src_uv || width <= 0 || height == 0 ||
|
||||
!dst_y || !dst_u || !dst_v) {
|
||||
if (!src_y || !src_uv || width <= 0 || height == 0 || !dst_y || !dst_u ||
|
||||
!dst_v) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -454,38 +465,23 @@ int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return NV12ToI420(src_y, src_stride_y,
|
||||
src_uv, src_stride_uv,
|
||||
dst_y, dst_stride_y,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
return NV12ToI420(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
|
||||
dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
|
||||
width, height);
|
||||
case kRotate90:
|
||||
RotatePlane90(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotateUV90(src_uv, src_stride_uv,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotateUV90(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotateUV270(src_uv, src_stride_uv,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotateUV270(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotateUV180(src_uv, src_stride_uv,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotateUV180(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
|
||||
@ -18,16 +18,16 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8* src, int src_stride, \
|
||||
uint8* dst, int dst_stride, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
|
||||
} \
|
||||
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);\
|
||||
}
|
||||
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8* src, int src_stride, uint8* dst, int dst_stride, \
|
||||
int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
|
||||
} \
|
||||
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEWX8_NEON
|
||||
TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
|
||||
@ -44,19 +44,16 @@ TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7)
|
||||
#undef TANY
|
||||
|
||||
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8* src, int src_stride, \
|
||||
uint8* dst_a, int dst_stride_a, \
|
||||
uint8* dst_b, int dst_stride_b, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, \
|
||||
n); \
|
||||
} \
|
||||
TransposeUVWx8_C(src + n * 2, src_stride, \
|
||||
dst_a + n * dst_stride_a, dst_stride_a, \
|
||||
dst_b + n * dst_stride_b, dst_stride_b, r); \
|
||||
}
|
||||
void NAMEANY(const uint8* src, int src_stride, uint8* dst_a, \
|
||||
int dst_stride_a, uint8* dst_b, int dst_stride_b, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, n); \
|
||||
} \
|
||||
TransposeUVWx8_C(src + n * 2, src_stride, dst_a + n * dst_stride_a, \
|
||||
dst_stride_a, dst_b + n * dst_stride_b, dst_stride_b, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEUVWX8_NEON
|
||||
TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
|
||||
@ -73,8 +70,3 @@ TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7)
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -22,29 +22,44 @@ extern "C" {
|
||||
|
||||
// ARGBScale has a function to copy pixels to a row, striding each source
|
||||
// pixel by a constant.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || \
|
||||
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || \
|
||||
(defined(__x86_64__) && !defined(__native_client__)) || \
|
||||
defined(__i386__))
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
|
||||
int src_stepx, uint8* dst_ptr, int dst_width);
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr,
|
||||
int src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_NEON
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
|
||||
int src_stepx, uint8* dst_ptr, int dst_width);
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr,
|
||||
int src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
#endif
|
||||
|
||||
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
|
||||
int src_stepx, uint8* dst_ptr, int dst_width);
|
||||
void ScaleARGBRowDownEven_C(const uint8* src_ptr,
|
||||
int,
|
||||
int src_stepx,
|
||||
uint8* dst_ptr,
|
||||
int dst_width);
|
||||
|
||||
static void ARGBTranspose(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height) {
|
||||
static void ARGBTranspose(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
int src_pixel_step = src_stride >> 2;
|
||||
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
|
||||
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
|
||||
int src_step, uint8* dst_ptr, int dst_width) =
|
||||
ScaleARGBRowDownEven_C;
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
|
||||
@ -63,8 +78,12 @@ static void ARGBTranspose(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBRotate90(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height) {
|
||||
void ARGBRotate90(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 90 is a ARGBTranspose with the source read
|
||||
// from bottom to top. So set the source pointer to the end
|
||||
// of the buffer and flip the sign of the source stride.
|
||||
@ -73,8 +92,12 @@ void ARGBRotate90(const uint8* src, int src_stride,
|
||||
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
|
||||
}
|
||||
|
||||
void ARGBRotate270(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height) {
|
||||
void ARGBRotate270(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 270 is a ARGBTranspose with the destination written
|
||||
// from bottom to top. So set the destination pointer to the end
|
||||
// of the buffer and flip the sign of the destination stride.
|
||||
@ -83,8 +106,12 @@ void ARGBRotate270(const uint8* src, int src_stride,
|
||||
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
|
||||
}
|
||||
|
||||
void ARGBRotate180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height) {
|
||||
void ARGBRotate180(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Swap first and last row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64(row, width * 4);
|
||||
const uint8* src_bot = src + src_stride * (height - 1);
|
||||
@ -154,9 +181,9 @@ void ARGBRotate180(const uint8* src, int src_stride,
|
||||
|
||||
// Odd height will harmlessly mirror the middle row twice.
|
||||
for (y = 0; y < half_height; ++y) {
|
||||
ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
|
||||
ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
|
||||
ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row
|
||||
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
|
||||
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
src_bot -= src_stride;
|
||||
@ -166,8 +193,12 @@ void ARGBRotate180(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb, int width, int height,
|
||||
int ARGBRotate(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
|
||||
return -1;
|
||||
@ -183,23 +214,19 @@ int ARGBRotate(const uint8* src_argb, int src_stride_argb,
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return ARGBCopy(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
return ARGBCopy(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
case kRotate90:
|
||||
ARGBRotate90(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
ARGBRotate90(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
|
||||
height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
ARGBRotate270(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
ARGBRotate270(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
|
||||
height);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
ARGBRotate180(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
ARGBRotate180(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
|
||||
height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
|
||||
@ -16,8 +16,11 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void TransposeWx8_C(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
void TransposeWx8_C(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst[0] = src[0 * src_stride];
|
||||
@ -33,9 +36,13 @@ void TransposeWx8_C(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx8_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width) {
|
||||
void TransposeUVWx8_C(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst_a[0] = src[0 * src_stride + 0];
|
||||
@ -60,9 +67,12 @@ void TransposeUVWx8_C(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void TransposeWxH_C(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int j;
|
||||
@ -72,10 +82,14 @@ void TransposeWxH_C(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void TransposeUVWxH_C(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width * 2; i += 2) {
|
||||
int j;
|
||||
|
||||
@ -22,342 +22,348 @@ extern "C" {
|
||||
|
||||
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
|
||||
#if defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"movq (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"movq (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movq (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"movq (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movq (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"lea 0x8(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
void TransposeWx8_SSSE3(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"movq (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"movq (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movq (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"movq (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movq (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"lea 0x8(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
|
||||
// Transpose 16x8. 64 bit
|
||||
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm9 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm9,%%xmm9 \n"
|
||||
"movdqu (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm10 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm10 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movdqa %%xmm10,%%xmm11 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"movdqu (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm12 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm12 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movdqa %%xmm12,%%xmm13 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movdqu (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm14 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"punpckhbw %%xmm7,%%xmm14 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"movdqa %%xmm14,%%xmm15 \n"
|
||||
"lea 0x10(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"punpcklwd %%xmm10,%%xmm8 \n"
|
||||
"punpcklwd %%xmm11,%%xmm9 \n"
|
||||
"movdqa %%xmm8,%%xmm10 \n"
|
||||
"movdqa %%xmm9,%%xmm11 \n"
|
||||
"palignr $0x8,%%xmm10,%%xmm10 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"punpcklwd %%xmm14,%%xmm12 \n"
|
||||
"punpcklwd %%xmm15,%%xmm13 \n"
|
||||
"movdqa %%xmm12,%%xmm14 \n"
|
||||
"movdqa %%xmm13,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm12,%%xmm8 \n"
|
||||
"movq %%xmm8,(%1) \n"
|
||||
"movdqa %%xmm8,%%xmm12 \n"
|
||||
"palignr $0x8,%%xmm12,%%xmm12 \n"
|
||||
"movq %%xmm12,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm14,%%xmm10 \n"
|
||||
"movdqa %%xmm10,%%xmm14 \n"
|
||||
"movq %%xmm10,(%1) \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"punpckldq %%xmm13,%%xmm9 \n"
|
||||
"movq %%xmm14,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm9,%%xmm13 \n"
|
||||
"movq %%xmm9,(%1) \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movq %%xmm13,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm15,%%xmm11 \n"
|
||||
"movq %%xmm11,(%1) \n"
|
||||
"movdqa %%xmm11,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movq %%xmm15,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
|
||||
);
|
||||
void TransposeWx8_Fast_SSSE3(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm9 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm9,%%xmm9 \n"
|
||||
"movdqu (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm10 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm10 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movdqa %%xmm10,%%xmm11 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"movdqu (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm12 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm12 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movdqa %%xmm12,%%xmm13 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movdqu (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm14 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"punpckhbw %%xmm7,%%xmm14 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"movdqa %%xmm14,%%xmm15 \n"
|
||||
"lea 0x10(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"punpcklwd %%xmm10,%%xmm8 \n"
|
||||
"punpcklwd %%xmm11,%%xmm9 \n"
|
||||
"movdqa %%xmm8,%%xmm10 \n"
|
||||
"movdqa %%xmm9,%%xmm11 \n"
|
||||
"palignr $0x8,%%xmm10,%%xmm10 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"punpcklwd %%xmm14,%%xmm12 \n"
|
||||
"punpcklwd %%xmm15,%%xmm13 \n"
|
||||
"movdqa %%xmm12,%%xmm14 \n"
|
||||
"movdqa %%xmm13,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm12,%%xmm8 \n"
|
||||
"movq %%xmm8,(%1) \n"
|
||||
"movdqa %%xmm8,%%xmm12 \n"
|
||||
"palignr $0x8,%%xmm12,%%xmm12 \n"
|
||||
"movq %%xmm12,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm14,%%xmm10 \n"
|
||||
"movdqa %%xmm10,%%xmm14 \n"
|
||||
"movq %%xmm10,(%1) \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"punpckldq %%xmm13,%%xmm9 \n"
|
||||
"movq %%xmm14,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm9,%%xmm13 \n"
|
||||
"movq %%xmm9,(%1) \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movq %%xmm13,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm15,%%xmm11 \n"
|
||||
"movq %%xmm11,(%1) \n"
|
||||
"movdqa %%xmm11,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movq %%xmm15,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
|
||||
"xmm15");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
|
||||
// Transpose UV 8x8. 64 bit.
|
||||
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%4),%%xmm1 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqu (%0,%4),%%xmm3 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm3 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"movdqu (%0,%4),%%xmm5 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm5 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4),%%xmm7 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm8 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %4 \n"
|
||||
"lea 0x10(%0,%4,8),%0 \n"
|
||||
"punpckhbw %%xmm7,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm7 \n"
|
||||
"neg %4 \n"
|
||||
// Second round of bit swap.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"movdqa %%xmm1,%%xmm9 \n"
|
||||
"punpckhwd %%xmm2,%%xmm8 \n"
|
||||
"punpckhwd %%xmm3,%%xmm9 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm2 \n"
|
||||
"movdqa %%xmm9,%%xmm3 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"movdqa %%xmm5,%%xmm9 \n"
|
||||
"punpckhwd %%xmm6,%%xmm8 \n"
|
||||
"punpckhwd %%xmm7,%%xmm9 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm8,%%xmm6 \n"
|
||||
"movdqa %%xmm9,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n" // Write back U channel
|
||||
"movhpd %%xmm0,(%2) \n" // Write back V channel
|
||||
"punpckhdq %%xmm4,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movlpd %%xmm2,(%1) \n"
|
||||
"movhpd %%xmm2,(%2) \n"
|
||||
"punpckhdq %%xmm6,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm1,%%xmm8 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movlpd %%xmm1,(%1) \n"
|
||||
"movhpd %%xmm1,(%2) \n"
|
||||
"punpckhdq %%xmm5,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm3,%%xmm8 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movlpd %%xmm3,(%1) \n"
|
||||
"movhpd %%xmm3,(%2) \n"
|
||||
"punpckhdq %%xmm7,%%xmm8 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"((intptr_t)(src_stride)), // %4
|
||||
"r"((intptr_t)(dst_stride_a)), // %5
|
||||
"r"((intptr_t)(dst_stride_b)) // %6
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9"
|
||||
);
|
||||
void TransposeUVWx8_SSE2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%4),%%xmm1 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqu (%0,%4),%%xmm3 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm3 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"movdqu (%0,%4),%%xmm5 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm5 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4),%%xmm7 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm8 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %4 \n"
|
||||
"lea 0x10(%0,%4,8),%0 \n"
|
||||
"punpckhbw %%xmm7,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm7 \n"
|
||||
"neg %4 \n"
|
||||
// Second round of bit swap.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"movdqa %%xmm1,%%xmm9 \n"
|
||||
"punpckhwd %%xmm2,%%xmm8 \n"
|
||||
"punpckhwd %%xmm3,%%xmm9 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm2 \n"
|
||||
"movdqa %%xmm9,%%xmm3 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"movdqa %%xmm5,%%xmm9 \n"
|
||||
"punpckhwd %%xmm6,%%xmm8 \n"
|
||||
"punpckhwd %%xmm7,%%xmm9 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm8,%%xmm6 \n"
|
||||
"movdqa %%xmm9,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n" // Write back U channel
|
||||
"movhpd %%xmm0,(%2) \n" // Write back V channel
|
||||
"punpckhdq %%xmm4,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movlpd %%xmm2,(%1) \n"
|
||||
"movhpd %%xmm2,(%2) \n"
|
||||
"punpckhdq %%xmm6,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm1,%%xmm8 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movlpd %%xmm1,(%1) \n"
|
||||
"movhpd %%xmm1,(%2) \n"
|
||||
"punpckhdq %%xmm5,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm3,%%xmm8 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movlpd %%xmm3,(%1) \n"
|
||||
"movhpd %%xmm3,(%2) \n"
|
||||
"punpckhdq %%xmm7,%%xmm8 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"((intptr_t)(src_stride)), // %4
|
||||
"r"((intptr_t)(dst_stride_a)), // %5
|
||||
"r"((intptr_t)(dst_stride_b)) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7", "xmm8", "xmm9");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
@ -18,18 +18,20 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
|
||||
(__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
__asm__ __volatile__ (
|
||||
void TransposeWx8_DSPR2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"addu $t3, $t2, %[src_stride] \n"
|
||||
"addu $t5, $t4, %[src_stride] \n"
|
||||
"addu $t6, $t2, $t4 \n"
|
||||
@ -38,8 +40,8 @@ void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" subu $t7, $t9, %[src_stride] \n"
|
||||
//dst + dst_stride word aligned
|
||||
"1: \n"
|
||||
// dst + dst_stride word aligned
|
||||
"1: \n"
|
||||
"lbu $t0, 0(%[src]) \n"
|
||||
"lbux $t1, %[src_stride](%[src]) \n"
|
||||
"lbux $t8, $t2(%[src]) \n"
|
||||
@ -65,8 +67,8 @@ void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
"bnez %[width], 1b \n"
|
||||
" addu %[dst], %[dst], %[dst_stride] \n"
|
||||
"b 2f \n"
|
||||
//dst + dst_stride unaligned
|
||||
"11: \n"
|
||||
// dst + dst_stride unaligned
|
||||
"11: \n"
|
||||
"lbu $t0, 0(%[src]) \n"
|
||||
"lbux $t1, %[src_stride](%[src]) \n"
|
||||
"lbux $t8, $t2(%[src]) \n"
|
||||
@ -92,23 +94,20 @@ void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
"swr $s1, 4(%[dst]) \n"
|
||||
"swl $s1, 7(%[dst]) \n"
|
||||
"bnez %[width], 11b \n"
|
||||
"addu %[dst], %[dst], %[dst_stride] \n"
|
||||
"2: \n"
|
||||
"addu %[dst], %[dst], %[dst_stride] \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
:[src] "+r" (src),
|
||||
[dst] "+r" (dst),
|
||||
[width] "+r" (width)
|
||||
:[src_stride] "r" (src_stride),
|
||||
[dst_stride] "r" (dst_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1"
|
||||
);
|
||||
: [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width)
|
||||
: [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1");
|
||||
}
|
||||
|
||||
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
__asm__ __volatile__ (
|
||||
void TransposeWx8_Fast_DSPR2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
__asm__ __volatile__(
|
||||
".set noat \n"
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
@ -126,67 +125,67 @@ void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" subu $t7, $t9, %[src_stride] \n"
|
||||
//dst + dst_stride word aligned
|
||||
// dst + dst_stride word aligned
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src]) \n"
|
||||
"lwx $t1, %[src_stride](%[src]) \n"
|
||||
"lwx $t8, $t2(%[src]) \n"
|
||||
"lwx $t9, $t3(%[src]) \n"
|
||||
|
||||
// t0 = | 30 | 20 | 10 | 00 |
|
||||
// t1 = | 31 | 21 | 11 | 01 |
|
||||
// t8 = | 32 | 22 | 12 | 02 |
|
||||
// t9 = | 33 | 23 | 13 | 03 |
|
||||
// t0 = | 30 | 20 | 10 | 00 |
|
||||
// t1 = | 31 | 21 | 11 | 01 |
|
||||
// t8 = | 32 | 22 | 12 | 02 |
|
||||
// t9 = | 33 | 23 | 13 | 03 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 21 | 01 | 20 | 00 |
|
||||
// s1 = | 23 | 03 | 22 | 02 |
|
||||
// s2 = | 31 | 11 | 30 | 10 |
|
||||
// s3 = | 33 | 13 | 32 | 12 |
|
||||
// s0 = | 21 | 01 | 20 | 00 |
|
||||
// s1 = | 23 | 03 | 22 | 02 |
|
||||
// s2 = | 31 | 11 | 30 | 10 |
|
||||
// s3 = | 33 | 13 | 32 | 12 |
|
||||
|
||||
"precr.qb.ph $s4, $s1, $s0 \n"
|
||||
"precrq.qb.ph $s5, $s1, $s0 \n"
|
||||
"precr.qb.ph $s6, $s3, $s2 \n"
|
||||
"precrq.qb.ph $s7, $s3, $s2 \n"
|
||||
|
||||
// s4 = | 03 | 02 | 01 | 00 |
|
||||
// s5 = | 23 | 22 | 21 | 20 |
|
||||
// s6 = | 13 | 12 | 11 | 10 |
|
||||
// s7 = | 33 | 32 | 31 | 30 |
|
||||
// s4 = | 03 | 02 | 01 | 00 |
|
||||
// s5 = | 23 | 22 | 21 | 20 |
|
||||
// s6 = | 13 | 12 | 11 | 10 |
|
||||
// s7 = | 33 | 32 | 31 | 30 |
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n"
|
||||
"lwx $t1, $t5(%[src]) \n"
|
||||
"lwx $t8, $t6(%[src]) \n"
|
||||
"lwx $t9, $t7(%[src]) \n"
|
||||
|
||||
// t0 = | 34 | 24 | 14 | 04 |
|
||||
// t1 = | 35 | 25 | 15 | 05 |
|
||||
// t8 = | 36 | 26 | 16 | 06 |
|
||||
// t9 = | 37 | 27 | 17 | 07 |
|
||||
// t0 = | 34 | 24 | 14 | 04 |
|
||||
// t1 = | 35 | 25 | 15 | 05 |
|
||||
// t8 = | 36 | 26 | 16 | 06 |
|
||||
// t9 = | 37 | 27 | 17 | 07 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 25 | 05 | 24 | 04 |
|
||||
// s1 = | 27 | 07 | 26 | 06 |
|
||||
// s2 = | 35 | 15 | 34 | 14 |
|
||||
// s3 = | 37 | 17 | 36 | 16 |
|
||||
// s0 = | 25 | 05 | 24 | 04 |
|
||||
// s1 = | 27 | 07 | 26 | 06 |
|
||||
// s2 = | 35 | 15 | 34 | 14 |
|
||||
// s3 = | 37 | 17 | 36 | 16 |
|
||||
|
||||
"precr.qb.ph $t0, $s1, $s0 \n"
|
||||
"precrq.qb.ph $t1, $s1, $s0 \n"
|
||||
"precr.qb.ph $t8, $s3, $s2 \n"
|
||||
"precrq.qb.ph $t9, $s3, $s2 \n"
|
||||
|
||||
// t0 = | 07 | 06 | 05 | 04 |
|
||||
// t1 = | 27 | 26 | 25 | 24 |
|
||||
// t8 = | 17 | 16 | 15 | 14 |
|
||||
// t9 = | 37 | 36 | 35 | 34 |
|
||||
// t0 = | 07 | 06 | 05 | 04 |
|
||||
// t1 = | 27 | 26 | 25 | 24 |
|
||||
// t8 = | 17 | 16 | 15 | 14 |
|
||||
// t9 = | 37 | 36 | 35 | 34 |
|
||||
|
||||
"addu $s0, %[dst], %[dst_stride] \n"
|
||||
"addu $s1, $s0, %[dst_stride] \n"
|
||||
@ -207,67 +206,67 @@ void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
"bnez $AT, 1b \n"
|
||||
" addu %[dst], $s2, %[dst_stride] \n"
|
||||
"b 2f \n"
|
||||
//dst + dst_stride unaligned
|
||||
// dst + dst_stride unaligned
|
||||
"11: \n"
|
||||
"lw $t0, 0(%[src]) \n"
|
||||
"lwx $t1, %[src_stride](%[src]) \n"
|
||||
"lwx $t8, $t2(%[src]) \n"
|
||||
"lwx $t9, $t3(%[src]) \n"
|
||||
|
||||
// t0 = | 30 | 20 | 10 | 00 |
|
||||
// t1 = | 31 | 21 | 11 | 01 |
|
||||
// t8 = | 32 | 22 | 12 | 02 |
|
||||
// t9 = | 33 | 23 | 13 | 03 |
|
||||
// t0 = | 30 | 20 | 10 | 00 |
|
||||
// t1 = | 31 | 21 | 11 | 01 |
|
||||
// t8 = | 32 | 22 | 12 | 02 |
|
||||
// t9 = | 33 | 23 | 13 | 03 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 21 | 01 | 20 | 00 |
|
||||
// s1 = | 23 | 03 | 22 | 02 |
|
||||
// s2 = | 31 | 11 | 30 | 10 |
|
||||
// s3 = | 33 | 13 | 32 | 12 |
|
||||
// s0 = | 21 | 01 | 20 | 00 |
|
||||
// s1 = | 23 | 03 | 22 | 02 |
|
||||
// s2 = | 31 | 11 | 30 | 10 |
|
||||
// s3 = | 33 | 13 | 32 | 12 |
|
||||
|
||||
"precr.qb.ph $s4, $s1, $s0 \n"
|
||||
"precrq.qb.ph $s5, $s1, $s0 \n"
|
||||
"precr.qb.ph $s6, $s3, $s2 \n"
|
||||
"precrq.qb.ph $s7, $s3, $s2 \n"
|
||||
|
||||
// s4 = | 03 | 02 | 01 | 00 |
|
||||
// s5 = | 23 | 22 | 21 | 20 |
|
||||
// s6 = | 13 | 12 | 11 | 10 |
|
||||
// s7 = | 33 | 32 | 31 | 30 |
|
||||
// s4 = | 03 | 02 | 01 | 00 |
|
||||
// s5 = | 23 | 22 | 21 | 20 |
|
||||
// s6 = | 13 | 12 | 11 | 10 |
|
||||
// s7 = | 33 | 32 | 31 | 30 |
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n"
|
||||
"lwx $t1, $t5(%[src]) \n"
|
||||
"lwx $t8, $t6(%[src]) \n"
|
||||
"lwx $t9, $t7(%[src]) \n"
|
||||
|
||||
// t0 = | 34 | 24 | 14 | 04 |
|
||||
// t1 = | 35 | 25 | 15 | 05 |
|
||||
// t8 = | 36 | 26 | 16 | 06 |
|
||||
// t9 = | 37 | 27 | 17 | 07 |
|
||||
// t0 = | 34 | 24 | 14 | 04 |
|
||||
// t1 = | 35 | 25 | 15 | 05 |
|
||||
// t8 = | 36 | 26 | 16 | 06 |
|
||||
// t9 = | 37 | 27 | 17 | 07 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 25 | 05 | 24 | 04 |
|
||||
// s1 = | 27 | 07 | 26 | 06 |
|
||||
// s2 = | 35 | 15 | 34 | 14 |
|
||||
// s3 = | 37 | 17 | 36 | 16 |
|
||||
// s0 = | 25 | 05 | 24 | 04 |
|
||||
// s1 = | 27 | 07 | 26 | 06 |
|
||||
// s2 = | 35 | 15 | 34 | 14 |
|
||||
// s3 = | 37 | 17 | 36 | 16 |
|
||||
|
||||
"precr.qb.ph $t0, $s1, $s0 \n"
|
||||
"precrq.qb.ph $t1, $s1, $s0 \n"
|
||||
"precr.qb.ph $t8, $s3, $s2 \n"
|
||||
"precrq.qb.ph $t9, $s3, $s2 \n"
|
||||
|
||||
// t0 = | 07 | 06 | 05 | 04 |
|
||||
// t1 = | 27 | 26 | 25 | 24 |
|
||||
// t8 = | 17 | 16 | 15 | 14 |
|
||||
// t9 = | 37 | 36 | 35 | 34 |
|
||||
// t0 = | 07 | 06 | 05 | 04 |
|
||||
// t1 = | 27 | 26 | 25 | 24 |
|
||||
// t8 = | 17 | 16 | 15 | 14 |
|
||||
// t9 = | 37 | 36 | 35 | 34 |
|
||||
|
||||
"addu $s0, %[dst], %[dst_stride] \n"
|
||||
"addu $s1, $s0, %[dst_stride] \n"
|
||||
@ -298,34 +297,33 @@ void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
".set at \n"
|
||||
:[src] "+r" (src),
|
||||
[dst] "+r" (dst),
|
||||
[width] "+r" (width)
|
||||
:[src_stride] "r" (src_stride),
|
||||
[dst_stride] "r" (dst_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
|
||||
);
|
||||
: [src] "+r"(src), [dst] "+r"(dst), [width] "+r"(width)
|
||||
: [src_stride] "r"(src_stride), [dst_stride] "r"(dst_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
|
||||
"s2", "s3", "s4", "s5", "s6", "s7");
|
||||
}
|
||||
|
||||
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
void TransposeUVWx8_DSPR2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"addu $t3, $t2, %[src_stride] \n"
|
||||
"addu $t5, $t4, %[src_stride] \n"
|
||||
"addu $t6, $t2, $t4 \n"
|
||||
"subu $t7, $t9, %[src_stride] \n"
|
||||
"srl $t1, %[width], 1 \n"
|
||||
|
||||
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
|
||||
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
|
||||
"andi $t0, %[dst_a], 0x3 \n"
|
||||
"andi $t8, %[dst_b], 0x3 \n"
|
||||
"or $t0, $t0, $t8 \n"
|
||||
@ -335,52 +333,52 @@ void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
"or $t0, $t0, $t8 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" nop \n"
|
||||
// dst + dst_stride word aligned (both, a & b dst addresses)
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
||||
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
||||
// dst + dst_stride word aligned (both, a & b dst addresses)
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
||||
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
||||
"addu $s5, %[dst_a], %[dst_stride_a] \n"
|
||||
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
||||
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
||||
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
||||
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
||||
"addu $s6, %[dst_b], %[dst_stride_b] \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
||||
|
||||
"sw $s3, 0($s5) \n"
|
||||
"sw $s4, 0($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
||||
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
||||
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
||||
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
||||
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
||||
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
||||
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
||||
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
||||
"sw $s3, 0(%[dst_a]) \n"
|
||||
"sw $s4, 0(%[dst_b]) \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
||||
"sw $s3, 4($s5) \n"
|
||||
"sw $s4, 4($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
||||
|
||||
"addiu %[src], 4 \n"
|
||||
"addiu $t1, -1 \n"
|
||||
@ -394,59 +392,59 @@ void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
"b 2f \n"
|
||||
" nop \n"
|
||||
|
||||
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
|
||||
"11: \n"
|
||||
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
||||
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
||||
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
|
||||
"11: \n"
|
||||
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
||||
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
||||
"addu $s5, %[dst_a], %[dst_stride_a] \n"
|
||||
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
||||
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
||||
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
||||
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
||||
"addu $s6, %[dst_b], %[dst_stride_b] \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
||||
|
||||
"swr $s3, 0($s5) \n"
|
||||
"swl $s3, 3($s5) \n"
|
||||
"swr $s4, 0($s6) \n"
|
||||
"swl $s4, 3($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
||||
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
||||
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
||||
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
||||
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
||||
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
||||
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
||||
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
||||
"swr $s3, 0(%[dst_a]) \n"
|
||||
"swl $s3, 3(%[dst_a]) \n"
|
||||
"swr $s4, 0(%[dst_b]) \n"
|
||||
"swl $s4, 3(%[dst_b]) \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
||||
|
||||
"swr $s3, 4($s5) \n"
|
||||
"swl $s3, 7($s5) \n"
|
||||
"swr $s4, 4($s6) \n"
|
||||
"swl $s4, 7($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
||||
|
||||
"addiu %[src], 4 \n"
|
||||
"addiu $t1, -1 \n"
|
||||
@ -462,18 +460,11 @@ void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
: [src] "+r" (src),
|
||||
[dst_a] "+r" (dst_a),
|
||||
[dst_b] "+r" (dst_b),
|
||||
[width] "+r" (width),
|
||||
[src_stride] "+r" (src_stride)
|
||||
: [dst_stride_a] "r" (dst_stride_a),
|
||||
[dst_stride_b] "r" (dst_stride_b)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3",
|
||||
"s4", "s5", "s6"
|
||||
);
|
||||
: [src] "+r"(src), [dst_a] "+r"(dst_a), [dst_b] "+r"(dst_b),
|
||||
[width] "+r"(width), [src_stride] "+r"(src_stride)
|
||||
: [dst_stride_a] "r"(dst_stride_a), [dst_stride_b] "r"(dst_stride_b)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
|
||||
"s2", "s3", "s4", "s5", "s6");
|
||||
}
|
||||
|
||||
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
|
||||
@ -21,11 +21,13 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
|
||||
!defined(__aarch64__)
|
||||
|
||||
static uvec8 kVTbl4x4Transpose =
|
||||
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
|
||||
static uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
|
||||
2, 6, 10, 14, 3, 7, 11, 15};
|
||||
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
void TransposeWx8_NEON(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
const uint8* src_temp;
|
||||
asm volatile (
|
||||
@ -240,12 +242,15 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
static uvec8 kVTbl4x4TransposeDi =
|
||||
{ 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
|
||||
static uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11,
|
||||
4, 12, 5, 13, 6, 14, 7, 15};
|
||||
|
||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
void TransposeUVWx8_NEON(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
const uint8* src_temp;
|
||||
asm volatile (
|
||||
|
||||
@ -21,13 +21,16 @@ extern "C" {
|
||||
// This module is for GCC Neon armv8 64 bit.
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
static uvec8 kVTbl4x4Transpose =
|
||||
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
|
||||
static uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
|
||||
2, 6, 10, 14, 3, 7, 11, 15};
|
||||
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
void TransposeWx8_NEON(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
const uint8* src_temp;
|
||||
int64 width64 = (int64) width; // Work around clang 3.4 warning.
|
||||
int64 width64 = (int64)width; // Work around clang 3.4 warning.
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
@ -247,16 +250,19 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
static uint8 kVTbl4x4TransposeDi[32] =
|
||||
{ 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
|
||||
1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
|
||||
static uint8 kVTbl4x4TransposeDi[32] = {
|
||||
0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
|
||||
1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
|
||||
|
||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
void TransposeUVWx8_NEON(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
const uint8* src_temp;
|
||||
int64 width64 = (int64) width; // Work around clang 3.4 warning.
|
||||
int64 width64 = (int64)width; // Work around clang 3.4 warning.
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
|
||||
@ -19,15 +19,17 @@ extern "C" {
|
||||
// This module is for 32 bit Visual C x86 and clangcl
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
|
||||
__declspec(naked)
|
||||
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
__declspec(naked) void TransposeWx8_SSSE3(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
__asm {
|
||||
push edi
|
||||
push esi
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // src
|
||||
mov edi, [esp + 12 + 8] // src_stride
|
||||
mov eax, [esp + 12 + 4] // src
|
||||
mov edi, [esp + 12 + 8] // src_stride
|
||||
mov edx, [esp + 12 + 12] // dst
|
||||
mov esi, [esp + 12 + 16] // dst_stride
|
||||
mov ecx, [esp + 12 + 20] // width
|
||||
@ -110,18 +112,20 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int w) {
|
||||
__declspec(naked) void TransposeUVWx8_SSE2(const uint8* src,
|
||||
int src_stride,
|
||||
uint8* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8* dst_b,
|
||||
int dst_stride_b,
|
||||
int w) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
mov eax, [esp + 16 + 4] // src
|
||||
mov edi, [esp + 16 + 8] // src_stride
|
||||
mov eax, [esp + 16 + 4] // src
|
||||
mov edi, [esp + 16 + 8] // src_stride
|
||||
mov edx, [esp + 16 + 12] // dst_a
|
||||
mov esi, [esp + 16 + 16] // dst_stride_a
|
||||
mov ebx, [esp + 16 + 20] // dst_b
|
||||
@ -134,8 +138,8 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
|
||||
align 4
|
||||
convertloop:
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + edi]
|
||||
lea eax, [eax + 2 * edi]
|
||||
@ -162,7 +166,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
lea eax, [eax + 2 * edi]
|
||||
movdqu [esp], xmm5 // backup xmm5
|
||||
neg edi
|
||||
movdqa xmm5, xmm6 // use xmm5 as temp register.
|
||||
movdqa xmm5, xmm6 // use xmm5 as temp register.
|
||||
punpcklbw xmm6, xmm7
|
||||
punpckhbw xmm5, xmm7
|
||||
movdqa xmm7, xmm5
|
||||
@ -183,7 +187,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
movdqa xmm6, xmm5
|
||||
movdqu xmm5, [esp] // restore xmm5
|
||||
movdqu [esp], xmm6 // backup xmm6
|
||||
movdqa xmm6, xmm5 // use xmm6 as temp register.
|
||||
movdqa xmm6, xmm5 // use xmm6 as temp register.
|
||||
punpcklwd xmm5, xmm7
|
||||
punpckhwd xmm6, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
@ -200,7 +204,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm4
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm2 // use xmm0 as the temp register.
|
||||
movdqa xmm0, xmm2 // use xmm0 as the temp register.
|
||||
punpckldq xmm2, xmm6
|
||||
movlpd qword ptr [edx], xmm2
|
||||
movhpd qword ptr [ebx], xmm2
|
||||
@ -209,7 +213,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm0
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm1 // use xmm0 as the temp register.
|
||||
movdqa xmm0, xmm1 // use xmm0 as the temp register.
|
||||
punpckldq xmm1, xmm5
|
||||
movlpd qword ptr [edx], xmm1
|
||||
movhpd qword ptr [ebx], xmm1
|
||||
@ -218,7 +222,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm0
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm3 // use xmm0 as the temp register.
|
||||
movdqa xmm0, xmm3 // use xmm0 as the temp register.
|
||||
punpckldq xmm3, xmm7
|
||||
movlpd qword ptr [edx], xmm3
|
||||
movhpd qword ptr [ebx], xmm3
|
||||
|
||||
@ -23,26 +23,26 @@ extern "C" {
|
||||
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
|
||||
|
||||
// Any 4 planes to 1 with yuvconstants
|
||||
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
const uint8* a_buf, uint8* dst_ptr, \
|
||||
const struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 5]); \
|
||||
memset(temp, 0, 64 * 4); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 192, a_buf + n, r); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
|
||||
yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
const uint8* a_buf, uint8* dst_ptr, \
|
||||
const struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 5]); \
|
||||
memset(temp, 0, 64 * 4); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 192, a_buf + n, r); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
|
||||
yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
|
||||
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
@ -59,23 +59,23 @@ ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
|
||||
#undef ANY41C
|
||||
|
||||
// Any 3 planes to 1.
|
||||
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 4]); \
|
||||
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 4]); \
|
||||
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
#ifdef HAS_I422TOYUY2ROW_SSE2
|
||||
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
|
||||
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
|
||||
@ -104,28 +104,27 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
|
||||
// on arm that subsamples 444 to 422 internally.
|
||||
// Any 3 planes to 1 with yuvconstants
|
||||
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 4]); \
|
||||
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
if (width & 1) { \
|
||||
temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \
|
||||
temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
|
||||
} \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \
|
||||
yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
|
||||
uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 4]); \
|
||||
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n, r); \
|
||||
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
|
||||
if (width & 1) { \
|
||||
temp[64 + SS(r, UVSHIFT)] = temp[64 + SS(r, UVSHIFT) - 1]; \
|
||||
temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \
|
||||
} \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
|
||||
SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
|
||||
@ -179,22 +178,22 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
|
||||
#undef ANY31C
|
||||
|
||||
// Any 2 planes to 1.
|
||||
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
|
||||
uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 3]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
|
||||
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
|
||||
SS(r, UVSHIFT) * SBPP2); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 3]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
|
||||
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
|
||||
SS(r, UVSHIFT) * SBPP2); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
// Merge functions.
|
||||
#ifdef HAS_MERGEUVROW_SSE2
|
||||
@ -256,23 +255,22 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
|
||||
#undef ANY21
|
||||
|
||||
// Any 2 planes to 1 with yuvconstants
|
||||
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
|
||||
uint8* dst_ptr, const struct YuvConstants* yuvconstants, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 3]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
|
||||
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
|
||||
SS(r, UVSHIFT) * SBPP2); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
|
||||
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \
|
||||
const struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 3]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, y_buf + n * SBPP, r * SBPP); \
|
||||
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
|
||||
SS(r, UVSHIFT) * SBPP2); \
|
||||
ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
// Biplanar to RGB.
|
||||
#ifdef HAS_NV12TOARGBROW_SSSE3
|
||||
@ -305,19 +303,19 @@ ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
|
||||
#undef ANY21C
|
||||
|
||||
// Any 1 to 1.
|
||||
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(temp, temp + 128, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(temp, temp + 128, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_COPYROW_AVX
|
||||
ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63)
|
||||
@ -500,20 +498,20 @@ ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
|
||||
#undef ANY11
|
||||
|
||||
// Any 1 to 1 blended. Destination is read, modify, write.
|
||||
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \
|
||||
ANY_SIMD(temp, temp + 128, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \
|
||||
ANY_SIMD(temp, temp + 128, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBCOPYALPHAROW_AVX2
|
||||
ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15)
|
||||
@ -530,32 +528,43 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
|
||||
#undef ANY11B
|
||||
|
||||
// Any 1 to 1 with parameter.
|
||||
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
|
||||
T shuffler, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 2]); \
|
||||
memset(temp, 0, 64); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
|
||||
ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
|
||||
}
|
||||
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, T shuffler, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 2]); \
|
||||
memset(temp, 0, 64); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
|
||||
ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
|
||||
}
|
||||
|
||||
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
|
||||
ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2,
|
||||
const uint32, 4, 2, 3)
|
||||
ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
|
||||
ARGBToRGB565DitherRow_SSE2,
|
||||
const uint32,
|
||||
4,
|
||||
2,
|
||||
3)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
|
||||
ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2,
|
||||
const uint32, 4, 2, 7)
|
||||
ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
|
||||
ARGBToRGB565DitherRow_AVX2,
|
||||
const uint32,
|
||||
4,
|
||||
2,
|
||||
7)
|
||||
#endif
|
||||
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
|
||||
ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON,
|
||||
const uint32, 4, 2, 7)
|
||||
ANY11P(ARGBToRGB565DitherRow_Any_NEON,
|
||||
ARGBToRGB565DitherRow_NEON,
|
||||
const uint32,
|
||||
4,
|
||||
2,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBSHUFFLEROW_SSE2
|
||||
ANY11P(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, const uint8*, 4, 4, 3)
|
||||
@ -572,20 +581,20 @@ ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8*, 4, 4, 3)
|
||||
#undef ANY11P
|
||||
|
||||
// Any 1 to 1 with parameter and shorts. BPP measures in shorts.
|
||||
#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint16* src_ptr, uint16* dst_ptr, \
|
||||
T shuffler, int width) { \
|
||||
SIMD_ALIGNED(uint16 temp[32 * 2]); \
|
||||
memset(temp, 0, 64); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
|
||||
ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
|
||||
}
|
||||
#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint16* src_ptr, uint16* dst_ptr, T shuffler, \
|
||||
int width) { \
|
||||
SIMD_ALIGNED(uint16 temp[32 * 2]); \
|
||||
memset(temp, 0, 64); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
|
||||
ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_HALFFLOATROW_SSE2
|
||||
ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, float, 1, 1, 7)
|
||||
@ -604,20 +613,20 @@ ANY11P16(HalfFloat1Row_Any_NEON, HalfFloat1Row_NEON, float, 1, 1, 7)
|
||||
#undef ANY11P16
|
||||
|
||||
// Any 1 to 1 with yuvconstants
|
||||
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
|
||||
const struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
|
||||
const struct YuvConstants* yuvconstants, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 2]); \
|
||||
memset(temp, 0, 128); /* for YUY2 and msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
|
||||
ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||
ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
|
||||
@ -634,21 +643,20 @@ ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
|
||||
|
||||
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
|
||||
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
||||
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
|
||||
ptrdiff_t src_stride_ptr, int width, \
|
||||
int source_y_fraction) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 3]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
|
||||
memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
|
||||
ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride_ptr, \
|
||||
int width, int source_y_fraction) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 3]); \
|
||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
|
||||
memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
|
||||
ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_INTERPOLATEROW_AVX2
|
||||
ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31)
|
||||
@ -665,19 +673,19 @@ ANY11T(InterpolateRow_Any_DSPR2, InterpolateRow_DSPR2, 1, 1, 3)
|
||||
#undef ANY11T
|
||||
|
||||
// Any 1 to 1 mirror.
|
||||
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 2]); \
|
||||
memset(temp, 0, 64); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr, r * BPP); \
|
||||
ANY_SIMD(temp, temp + 64, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
|
||||
}
|
||||
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64 * 2]); \
|
||||
memset(temp, 0, 64); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr, r* BPP); \
|
||||
ANY_SIMD(temp, temp + 64, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_MIRRORROW_AVX2
|
||||
ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
|
||||
@ -706,17 +714,17 @@ ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
|
||||
#undef ANY11M
|
||||
|
||||
// Any 1 plane. (memset)
|
||||
#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
|
||||
void NAMEANY(uint8* dst_ptr, T v32, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64]); \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(dst_ptr, v32, n); \
|
||||
} \
|
||||
ANY_SIMD(temp, v32, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp, r * BPP); \
|
||||
}
|
||||
#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
|
||||
void NAMEANY(uint8* dst_ptr, T v32, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[64]); \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(dst_ptr, v32, n); \
|
||||
} \
|
||||
ANY_SIMD(temp, v32, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, temp, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SETROW_X86
|
||||
ANY1(SetRow_Any_X86, SetRow_X86, uint8, 1, 3)
|
||||
@ -730,20 +738,20 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32, 4, 3)
|
||||
#undef ANY1
|
||||
|
||||
// Any 1 to 2. Outputs UV planes.
|
||||
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) {\
|
||||
SIMD_ALIGNED(uint8 temp[128 * 3]); \
|
||||
memset(temp, 0, 128); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_u, dst_v, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
|
||||
memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
|
||||
memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
|
||||
}
|
||||
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint8* dst_u, uint8* dst_v, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 3]); \
|
||||
memset(temp, 0, 128); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_u, dst_v, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
|
||||
memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
|
||||
memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SPLITUVROW_SSE2
|
||||
ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
|
||||
@ -781,29 +789,29 @@ ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
|
||||
|
||||
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
|
||||
// 128 byte row allows for 32 avx ARGB pixels.
|
||||
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \
|
||||
uint8* dst_u, uint8* dst_v, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 4]); \
|
||||
memset(temp, 0, 128 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
||||
SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */\
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, \
|
||||
temp + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
|
||||
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
|
||||
memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
|
||||
memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
|
||||
}
|
||||
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, int src_stride_ptr, uint8* dst_u, \
|
||||
uint8* dst_v, int width) { \
|
||||
SIMD_ALIGNED(uint8 temp[128 * 4]); \
|
||||
memset(temp, 0, 128 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
|
||||
} \
|
||||
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
|
||||
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
|
||||
SS(r, UVSHIFT) * BPP); \
|
||||
if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
|
||||
memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
|
||||
BPP); \
|
||||
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
|
||||
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
|
||||
} \
|
||||
ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
|
||||
memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
|
||||
memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
|
||||
}
|
||||
|
||||
#ifdef HAS_ARGBTOUVROW_AVX2
|
||||
ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,8 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
|
||||
// This module is for GCC x86 and x64.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
@ -2566,7 +2568,7 @@ void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
"vpsrlw $0x8,%%ymm5,%%ymm5 \n"
|
||||
"sub %1,%2 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
"vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
|
||||
"lea " MEMLEA(0x40,0) ",%0 \n"
|
||||
@ -2603,7 +2605,7 @@ void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
"psrlw $0x8,%%xmm5 \n"
|
||||
"sub %1,%2 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
@ -2637,7 +2639,7 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
asm volatile (
|
||||
"sub %0,%1 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
|
||||
MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
@ -2668,7 +2670,7 @@ void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
asm volatile (
|
||||
"sub %0,%1 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
@ -5146,7 +5148,7 @@ void I422ToYUY2Row_SSE2(const uint8* src_y,
|
||||
asm volatile (
|
||||
"sub %1,%2 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"movq " MEMACCESS(1) ",%%xmm2 \n"
|
||||
MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3
|
||||
"lea " MEMLEA(0x8,1) ",%1 \n"
|
||||
@ -5181,7 +5183,7 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
|
||||
asm volatile (
|
||||
"sub %1,%2 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"movq " MEMACCESS(1) ",%%xmm2 \n"
|
||||
MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3
|
||||
"lea " MEMLEA(0x8,1) ",%1 \n"
|
||||
@ -5602,6 +5604,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
// clang-format on
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
|
||||
1331
source/row_mips.cc
1331
source/row_mips.cc
File diff suppressed because it is too large
Load Diff
@ -20,108 +20,111 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// Fill YUV -> RGB conversion constants into vectors
|
||||
#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) { \
|
||||
ub = __msa_fill_w(yuvconst->kUVToB[0]); \
|
||||
vr = __msa_fill_w(yuvconst->kUVToR[1]); \
|
||||
ug = __msa_fill_w(yuvconst->kUVToG[0]); \
|
||||
vg = __msa_fill_w(yuvconst->kUVToG[1]); \
|
||||
bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \
|
||||
bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \
|
||||
br = __msa_fill_w(yuvconst->kUVBiasR[0]); \
|
||||
yg = __msa_fill_w(yuvconst->kYToRgb[0]); \
|
||||
}
|
||||
#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) \
|
||||
{ \
|
||||
ub = __msa_fill_w(yuvconst->kUVToB[0]); \
|
||||
vr = __msa_fill_w(yuvconst->kUVToR[1]); \
|
||||
ug = __msa_fill_w(yuvconst->kUVToG[0]); \
|
||||
vg = __msa_fill_w(yuvconst->kUVToG[1]); \
|
||||
bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \
|
||||
bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \
|
||||
br = __msa_fill_w(yuvconst->kUVBiasR[0]); \
|
||||
yg = __msa_fill_w(yuvconst->kYToRgb[0]); \
|
||||
}
|
||||
|
||||
// Load YUV 422 pixel data
|
||||
#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) { \
|
||||
uint64 y_m; \
|
||||
uint32 u_m, v_m; \
|
||||
v4i32 zero_m = { 0 }; \
|
||||
y_m = LD(psrc_y); \
|
||||
u_m = LW(psrc_u); \
|
||||
v_m = LW(psrc_v); \
|
||||
out_y = (v16u8) __msa_insert_d((v2i64) zero_m, 0, (int64) y_m); \
|
||||
out_u = (v16u8) __msa_insert_w(zero_m, 0, (int32) u_m); \
|
||||
out_v = (v16u8) __msa_insert_w(zero_m, 0, (int32) v_m); \
|
||||
}
|
||||
#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
|
||||
{ \
|
||||
uint64 y_m; \
|
||||
uint32 u_m, v_m; \
|
||||
v4i32 zero_m = {0}; \
|
||||
y_m = LD(psrc_y); \
|
||||
u_m = LW(psrc_u); \
|
||||
v_m = LW(psrc_v); \
|
||||
out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64)y_m); \
|
||||
out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32)u_m); \
|
||||
out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32)v_m); \
|
||||
}
|
||||
|
||||
// Convert 8 pixels of YUV 420 to RGB.
|
||||
#define YUVTORGB(in_y, in_u, in_v, \
|
||||
ub, vr, ug, vg, bb, bg, br, yg, \
|
||||
out_b, out_g, out_r) { \
|
||||
v8i16 vec0_m; \
|
||||
v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \
|
||||
v4i32 reg5_m, reg6_m, reg7_m, reg8_m, reg9_m; \
|
||||
v4i32 max_val_m = __msa_ldi_w(255); \
|
||||
v8i16 zero_m = { 0 }; \
|
||||
\
|
||||
in_u = (v16u8) __msa_ilvr_b((v16i8) in_u, (v16i8) in_u); \
|
||||
in_v = (v16u8) __msa_ilvr_b((v16i8) in_v, (v16i8) in_v); \
|
||||
vec0_m = (v8i16) __msa_ilvr_b((v16i8) in_y, (v16i8) in_y); \
|
||||
reg0_m = (v4i32) __msa_ilvr_h(zero_m, vec0_m); \
|
||||
reg1_m = (v4i32) __msa_ilvl_h(zero_m, vec0_m); \
|
||||
reg0_m *= vec_yg; \
|
||||
reg1_m *= vec_yg; \
|
||||
reg0_m = __msa_srai_w(reg0_m, 16); \
|
||||
reg1_m = __msa_srai_w(reg1_m, 16); \
|
||||
reg4_m = reg0_m + br; \
|
||||
reg5_m = reg1_m + br; \
|
||||
reg2_m = reg0_m + bg; \
|
||||
reg3_m = reg1_m + bg; \
|
||||
reg0_m += bb; \
|
||||
reg1_m += bb; \
|
||||
vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in_u); \
|
||||
reg6_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \
|
||||
reg7_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \
|
||||
vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in_v); \
|
||||
reg8_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \
|
||||
reg9_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \
|
||||
reg0_m -= reg6_m * ub; \
|
||||
reg1_m -= reg7_m * ub; \
|
||||
reg2_m -= reg6_m * ug; \
|
||||
reg3_m -= reg7_m * ug; \
|
||||
reg4_m -= reg8_m * vr; \
|
||||
reg5_m -= reg9_m * vr; \
|
||||
reg2_m -= reg8_m * vg; \
|
||||
reg3_m -= reg9_m * vg; \
|
||||
reg0_m = __msa_srai_w(reg0_m, 6); \
|
||||
reg1_m = __msa_srai_w(reg1_m, 6); \
|
||||
reg2_m = __msa_srai_w(reg2_m, 6); \
|
||||
reg3_m = __msa_srai_w(reg3_m, 6); \
|
||||
reg4_m = __msa_srai_w(reg4_m, 6); \
|
||||
reg5_m = __msa_srai_w(reg5_m, 6); \
|
||||
reg0_m = __msa_maxi_s_w(reg0_m, 0); \
|
||||
reg1_m = __msa_maxi_s_w(reg1_m, 0); \
|
||||
reg2_m = __msa_maxi_s_w(reg2_m, 0); \
|
||||
reg3_m = __msa_maxi_s_w(reg3_m, 0); \
|
||||
reg4_m = __msa_maxi_s_w(reg4_m, 0); \
|
||||
reg5_m = __msa_maxi_s_w(reg5_m, 0); \
|
||||
reg0_m = __msa_min_s_w(reg0_m, max_val_m); \
|
||||
reg1_m = __msa_min_s_w(reg1_m, max_val_m); \
|
||||
reg2_m = __msa_min_s_w(reg2_m, max_val_m); \
|
||||
reg3_m = __msa_min_s_w(reg3_m, max_val_m); \
|
||||
reg4_m = __msa_min_s_w(reg4_m, max_val_m); \
|
||||
reg5_m = __msa_min_s_w(reg5_m, max_val_m); \
|
||||
out_b = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \
|
||||
out_g = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \
|
||||
out_r = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \
|
||||
}
|
||||
#define YUVTORGB(in_y, in_u, in_v, ub, vr, ug, vg, bb, bg, br, yg, out_b, \
|
||||
out_g, out_r) \
|
||||
{ \
|
||||
v8i16 vec0_m; \
|
||||
v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \
|
||||
v4i32 reg5_m, reg6_m, reg7_m, reg8_m, reg9_m; \
|
||||
v4i32 max_val_m = __msa_ldi_w(255); \
|
||||
v8i16 zero_m = {0}; \
|
||||
\
|
||||
in_u = (v16u8)__msa_ilvr_b((v16i8)in_u, (v16i8)in_u); \
|
||||
in_v = (v16u8)__msa_ilvr_b((v16i8)in_v, (v16i8)in_v); \
|
||||
vec0_m = (v8i16)__msa_ilvr_b((v16i8)in_y, (v16i8)in_y); \
|
||||
reg0_m = (v4i32)__msa_ilvr_h(zero_m, vec0_m); \
|
||||
reg1_m = (v4i32)__msa_ilvl_h(zero_m, vec0_m); \
|
||||
reg0_m *= vec_yg; \
|
||||
reg1_m *= vec_yg; \
|
||||
reg0_m = __msa_srai_w(reg0_m, 16); \
|
||||
reg1_m = __msa_srai_w(reg1_m, 16); \
|
||||
reg4_m = reg0_m + br; \
|
||||
reg5_m = reg1_m + br; \
|
||||
reg2_m = reg0_m + bg; \
|
||||
reg3_m = reg1_m + bg; \
|
||||
reg0_m += bb; \
|
||||
reg1_m += bb; \
|
||||
vec0_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_u); \
|
||||
reg6_m = (v4i32)__msa_ilvr_h(zero_m, (v8i16)vec0_m); \
|
||||
reg7_m = (v4i32)__msa_ilvl_h(zero_m, (v8i16)vec0_m); \
|
||||
vec0_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_v); \
|
||||
reg8_m = (v4i32)__msa_ilvr_h(zero_m, (v8i16)vec0_m); \
|
||||
reg9_m = (v4i32)__msa_ilvl_h(zero_m, (v8i16)vec0_m); \
|
||||
reg0_m -= reg6_m * ub; \
|
||||
reg1_m -= reg7_m * ub; \
|
||||
reg2_m -= reg6_m * ug; \
|
||||
reg3_m -= reg7_m * ug; \
|
||||
reg4_m -= reg8_m * vr; \
|
||||
reg5_m -= reg9_m * vr; \
|
||||
reg2_m -= reg8_m * vg; \
|
||||
reg3_m -= reg9_m * vg; \
|
||||
reg0_m = __msa_srai_w(reg0_m, 6); \
|
||||
reg1_m = __msa_srai_w(reg1_m, 6); \
|
||||
reg2_m = __msa_srai_w(reg2_m, 6); \
|
||||
reg3_m = __msa_srai_w(reg3_m, 6); \
|
||||
reg4_m = __msa_srai_w(reg4_m, 6); \
|
||||
reg5_m = __msa_srai_w(reg5_m, 6); \
|
||||
reg0_m = __msa_maxi_s_w(reg0_m, 0); \
|
||||
reg1_m = __msa_maxi_s_w(reg1_m, 0); \
|
||||
reg2_m = __msa_maxi_s_w(reg2_m, 0); \
|
||||
reg3_m = __msa_maxi_s_w(reg3_m, 0); \
|
||||
reg4_m = __msa_maxi_s_w(reg4_m, 0); \
|
||||
reg5_m = __msa_maxi_s_w(reg5_m, 0); \
|
||||
reg0_m = __msa_min_s_w(reg0_m, max_val_m); \
|
||||
reg1_m = __msa_min_s_w(reg1_m, max_val_m); \
|
||||
reg2_m = __msa_min_s_w(reg2_m, max_val_m); \
|
||||
reg3_m = __msa_min_s_w(reg3_m, max_val_m); \
|
||||
reg4_m = __msa_min_s_w(reg4_m, max_val_m); \
|
||||
reg5_m = __msa_min_s_w(reg5_m, max_val_m); \
|
||||
out_b = __msa_pckev_h((v8i16)reg1_m, (v8i16)reg0_m); \
|
||||
out_g = __msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \
|
||||
out_r = __msa_pckev_h((v8i16)reg5_m, (v8i16)reg4_m); \
|
||||
}
|
||||
|
||||
// Pack and Store 8 ARGB values.
|
||||
#define STOREARGB(in0, in1, in2, in3, pdst_argb) { \
|
||||
v8i16 vec0_m, vec1_m; \
|
||||
v16u8 dst0_m, dst1_m; \
|
||||
vec0_m = (v8i16) __msa_ilvev_b((v16i8) in1, (v16i8) in0); \
|
||||
vec1_m = (v8i16) __msa_ilvev_b((v16i8) in3, (v16i8) in2); \
|
||||
dst0_m = (v16u8) __msa_ilvr_h(vec1_m, vec0_m); \
|
||||
dst1_m = (v16u8) __msa_ilvl_h(vec1_m, vec0_m); \
|
||||
ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \
|
||||
}
|
||||
#define STOREARGB(in0, in1, in2, in3, pdst_argb) \
|
||||
{ \
|
||||
v8i16 vec0_m, vec1_m; \
|
||||
v16u8 dst0_m, dst1_m; \
|
||||
vec0_m = (v8i16)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
|
||||
vec1_m = (v8i16)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \
|
||||
dst0_m = (v16u8)__msa_ilvr_h(vec1_m, vec0_m); \
|
||||
dst1_m = (v16u8)__msa_ilvl_h(vec1_m, vec0_m); \
|
||||
ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \
|
||||
}
|
||||
|
||||
void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v16u8 dst0, dst1, dst2, dst3;
|
||||
v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
|
||||
v16i8 shuffler = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
|
||||
src += width - 64;
|
||||
|
||||
for (x = 0; x < width; x += 64) {
|
||||
@ -138,7 +141,7 @@ void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v16u8 dst0, dst1, dst2, dst3;
|
||||
v16i8 shuffler = { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 };
|
||||
v16i8 shuffler = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
|
||||
src += width * 4 - 64;
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
@ -199,22 +202,25 @@ void I422ToUYVYRow_MSA(const uint8* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) {
|
||||
void I422ToARGBRow_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2;
|
||||
v8i16 vec0, vec1, vec2;
|
||||
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
|
||||
v16u8 const_255 = (v16u8) __msa_ldi_b(255);
|
||||
v16u8 const_255 = (v16u8)__msa_ldi_b(255);
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug,
|
||||
vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg);
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
READYUV422(src_y, src_u, src_v, src0, src1, src2);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
|
||||
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg, vec0, vec1, vec2);
|
||||
STOREARGB(vec0, vec1, vec2, const_255, rgb_buf);
|
||||
src_y += 8;
|
||||
src_u += 4;
|
||||
@ -223,22 +229,25 @@ void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
|
||||
}
|
||||
}
|
||||
|
||||
void YUVTORGBARow_MSA(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) {
|
||||
void YUVTORGBARow_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2;
|
||||
v8i16 vec0, vec1, vec2;
|
||||
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
|
||||
v16u8 const_255 = (v16u8) __msa_ldi_b(255);
|
||||
v16u8 const_255 = (v16u8)__msa_ldi_b(255);
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug,
|
||||
vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg);
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
READYUV422(src_y, src_u, src_v, src0, src1, src2);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
|
||||
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg, vec0, vec1, vec2);
|
||||
STOREARGB(const_255, vec0, vec1, vec2, rgb_buf);
|
||||
src_y += 8;
|
||||
src_u += 4;
|
||||
@ -247,8 +256,10 @@ void YUVTORGBARow_MSA(const uint8* src_y, const uint8* src_u,
|
||||
}
|
||||
}
|
||||
|
||||
void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, const uint8* src_a,
|
||||
void I422AlphaToARGBRow_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
const uint8* src_a,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
@ -257,18 +268,18 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v8i16 vec0, vec1, vec2;
|
||||
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
|
||||
v4i32 zero = { 0 };
|
||||
v4i32 zero = {0};
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug,
|
||||
vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg);
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
data_a = LD(src_a);
|
||||
READYUV422(src_y, src_u, src_v, src0, src1, src2);
|
||||
src3 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_a);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
|
||||
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
|
||||
src3 = (v16u8) __msa_ilvr_b((v16i8) src3, (v16i8) src3);
|
||||
src3 = (v16u8)__msa_insert_d((v2i64)zero, 0, data_a);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg, vec0, vec1, vec2);
|
||||
src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3);
|
||||
STOREARGB(vec0, vec1, vec2, src3, rgb_buf);
|
||||
src_y += 8;
|
||||
src_u += 4;
|
||||
@ -278,44 +289,47 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
|
||||
}
|
||||
}
|
||||
|
||||
void YUVTORGB24Row_MSA(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int32 width) {
|
||||
void YUVTORGB24Row_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int32 width) {
|
||||
int x;
|
||||
int64 data_u, data_v;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, dst0, dst1, dst2;
|
||||
v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
|
||||
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
|
||||
v16u8 reg0, reg1, reg2, reg3;
|
||||
v2i64 zero = { 0 };
|
||||
v16i8 shuffler0 = { 0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10 };
|
||||
v16i8 shuffler1 = { 0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10 };
|
||||
v16i8 shuffler2 =
|
||||
{ 26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31 };
|
||||
v2i64 zero = {0};
|
||||
v16i8 shuffler0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10};
|
||||
v16i8 shuffler1 = {0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10};
|
||||
v16i8 shuffler2 = {26, 6, 7, 27, 8, 9, 28, 10,
|
||||
11, 29, 12, 13, 30, 14, 15, 31};
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug,
|
||||
vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg);
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
src0 = (v16u8) __msa_ld_b((v16u8*) src_y, 0);
|
||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_y, 0);
|
||||
data_u = LD(src_u);
|
||||
data_v = LD(src_v);
|
||||
src1 = (v16u8) __msa_insert_d(zero, 0, data_u);
|
||||
src2 = (v16u8) __msa_insert_d(zero, 0, data_v);
|
||||
src3 = (v16u8) __msa_sldi_b((v16i8) src0, (v16i8) src0, 8);
|
||||
src4 = (v16u8) __msa_sldi_b((v16i8) src1, (v16i8) src1, 4);
|
||||
src5 = (v16u8) __msa_sldi_b((v16i8) src2, (v16i8) src2, 4);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
|
||||
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
|
||||
YUVTORGB(src3, src4, src5, vec_ub, vec_vr, vec_ug, vec_vg,
|
||||
vec_bb, vec_bg, vec_br, vec_yg, vec3, vec4, vec5);
|
||||
reg0 = (v16u8) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0);
|
||||
reg2 = (v16u8) __msa_ilvev_b((v16i8) vec4, (v16i8) vec3);
|
||||
reg3 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec2);
|
||||
reg1 = (v16u8) __msa_sldi_b((v16i8) reg2, (v16i8) reg0, 11);
|
||||
dst0 = (v16u8) __msa_vshf_b(shuffler0, (v16i8) reg3, (v16i8) reg0);
|
||||
dst1 = (v16u8) __msa_vshf_b(shuffler1, (v16i8) reg3, (v16i8) reg1);
|
||||
dst2 = (v16u8) __msa_vshf_b(shuffler2, (v16i8) reg3, (v16i8) reg2);
|
||||
src1 = (v16u8)__msa_insert_d(zero, 0, data_u);
|
||||
src2 = (v16u8)__msa_insert_d(zero, 0, data_v);
|
||||
src3 = (v16u8)__msa_sldi_b((v16i8)src0, (v16i8)src0, 8);
|
||||
src4 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src1, 4);
|
||||
src5 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg, vec0, vec1, vec2);
|
||||
YUVTORGB(src3, src4, src5, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg, vec3, vec4, vec5);
|
||||
reg0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
|
||||
reg2 = (v16u8)__msa_ilvev_b((v16i8)vec4, (v16i8)vec3);
|
||||
reg3 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec2);
|
||||
reg1 = (v16u8)__msa_sldi_b((v16i8)reg2, (v16i8)reg0, 11);
|
||||
dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0);
|
||||
dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1);
|
||||
dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2);
|
||||
ST_UB2(dst0, dst1, rgb_buf, 16);
|
||||
ST_UB(dst2, (rgb_buf + 32));
|
||||
src_y += 16;
|
||||
@ -326,28 +340,31 @@ void YUVTORGB24Row_MSA(const uint8* src_y, const uint8* src_u,
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
|
||||
void YUVTORGB565Row_MSA(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_rgb565,
|
||||
const struct YuvConstants* yuvconstants, int width) {
|
||||
void YUVTORGB565Row_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_rgb565,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, dst0;
|
||||
v8i16 vec0, vec1, vec2;
|
||||
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug,
|
||||
vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg);
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
READYUV422(src_y, src_u, src_v, src0, src1, src2);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
|
||||
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec2, vec1);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg, vec0, vec2, vec1);
|
||||
vec0 = __msa_srai_h(vec0, 3);
|
||||
vec1 = __msa_srai_h(vec1, 3);
|
||||
vec2 = __msa_srai_h(vec2, 2);
|
||||
vec1 = __msa_slli_h(vec1, 11);
|
||||
vec2 = __msa_slli_h(vec2, 5);
|
||||
vec0 |= vec1;
|
||||
dst0 = (v16u8) (vec2 | vec0);
|
||||
dst0 = (v16u8)(vec2 | vec0);
|
||||
ST_UB(dst0, dst_rgb565);
|
||||
src_y += 8;
|
||||
src_u += 4;
|
||||
@ -357,31 +374,34 @@ void YUVTORGB565Row_MSA(const uint8* src_y, const uint8* src_u,
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G.
|
||||
void I422ToARGB4444Row_MSA(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_argb4444,
|
||||
const struct YuvConstants* yuvconstants, int width) {
|
||||
void I422ToARGB4444Row_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb4444,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, dst0;
|
||||
v8i16 vec0, vec1, vec2;
|
||||
v8u16 reg0, reg1, reg2;
|
||||
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
|
||||
v8u16 const_0xF000 = (v8u16) __msa_fill_h(0xF000);
|
||||
v8u16 const_0xF000 = (v8u16)__msa_fill_h(0xF000);
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug,
|
||||
vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg);
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
READYUV422(src_y, src_u, src_v, src0, src1, src2);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
|
||||
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
|
||||
reg0 = (v8u16) __msa_srai_h(vec0, 4);
|
||||
reg1 = (v8u16) __msa_srai_h(vec1, 4);
|
||||
reg2 = (v8u16) __msa_srai_h(vec2, 4);
|
||||
reg1 = (v8u16) __msa_slli_h((v8i16) reg1, 4);
|
||||
reg2 = (v8u16) __msa_slli_h((v8i16) reg2, 8);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg, vec0, vec1, vec2);
|
||||
reg0 = (v8u16)__msa_srai_h(vec0, 4);
|
||||
reg1 = (v8u16)__msa_srai_h(vec1, 4);
|
||||
reg2 = (v8u16)__msa_srai_h(vec2, 4);
|
||||
reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 4);
|
||||
reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 8);
|
||||
reg1 |= const_0xF000;
|
||||
reg0 |= reg2;
|
||||
dst0 = (v16u8) (reg1 | reg0);
|
||||
dst0 = (v16u8)(reg1 | reg0);
|
||||
ST_UB(dst0, dst_argb4444);
|
||||
src_y += 8;
|
||||
src_u += 4;
|
||||
@ -390,31 +410,34 @@ void I422ToARGB4444Row_MSA(const uint8* src_y, const uint8* src_u,
|
||||
}
|
||||
}
|
||||
|
||||
void I422ToARGB1555Row_MSA(const uint8* src_y, const uint8* src_u,
|
||||
const uint8* src_v, uint8* dst_argb1555,
|
||||
const struct YuvConstants* yuvconstants, int width) {
|
||||
void I422ToARGB1555Row_MSA(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb1555,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, dst0;
|
||||
v8i16 vec0, vec1, vec2;
|
||||
v8u16 reg0, reg1, reg2;
|
||||
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
|
||||
v8u16 const_0x8000 = (v8u16) __msa_fill_h(0x8000);
|
||||
v8u16 const_0x8000 = (v8u16)__msa_fill_h(0x8000);
|
||||
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug,
|
||||
vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
|
||||
YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg);
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
READYUV422(src_y, src_u, src_v, src0, src1, src2);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
|
||||
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
|
||||
reg0 = (v8u16) __msa_srai_h(vec0, 3);
|
||||
reg1 = (v8u16) __msa_srai_h(vec1, 3);
|
||||
reg2 = (v8u16) __msa_srai_h(vec2, 3);
|
||||
reg1 = (v8u16) __msa_slli_h((v8i16) reg1, 5);
|
||||
reg2 = (v8u16) __msa_slli_h((v8i16) reg2, 10);
|
||||
YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
|
||||
vec_br, vec_yg, vec0, vec1, vec2);
|
||||
reg0 = (v8u16)__msa_srai_h(vec0, 3);
|
||||
reg1 = (v8u16)__msa_srai_h(vec1, 3);
|
||||
reg2 = (v8u16)__msa_srai_h(vec2, 3);
|
||||
reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 5);
|
||||
reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 10);
|
||||
reg1 |= const_0x8000;
|
||||
reg0 |= reg2;
|
||||
dst0 = (v16u8) (reg1 | reg0);
|
||||
dst0 = (v16u8)(reg1 | reg0);
|
||||
ST_UB(dst0, dst_argb1555);
|
||||
src_y += 8;
|
||||
src_u += 4;
|
||||
@ -429,16 +452,19 @@ void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) {
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
|
||||
dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
|
||||
dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||
ST_UB2(dst0, dst1, dst_y, 16);
|
||||
src_yuy2 += 64;
|
||||
dst_y += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void YUY2ToUVRow_MSA(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void YUY2ToUVRow_MSA(const uint8* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_yuy2_next = src_yuy2 + src_stride_yuy2;
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
@ -447,14 +473,14 @@ void YUY2ToUVRow_MSA(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
for (x = 0; x < width; x += 32) {
|
||||
LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
|
||||
LD_UB4(src_yuy2_next, 16, src4, src5, src6, src7);
|
||||
src0 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
|
||||
src1 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
|
||||
src2 = (v16u8) __msa_pckod_b((v16i8) src5, (v16i8) src4);
|
||||
src3 = (v16u8) __msa_pckod_b((v16i8) src7, (v16i8) src6);
|
||||
src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
|
||||
src2 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4);
|
||||
src3 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6);
|
||||
vec0 = __msa_aver_u_b(src0, src2);
|
||||
vec1 = __msa_aver_u_b(src1, src3);
|
||||
dst0 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0);
|
||||
dst1 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||
dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
|
||||
ST_UB(dst0, dst_u);
|
||||
ST_UB(dst1, dst_v);
|
||||
src_yuy2 += 64;
|
||||
@ -464,17 +490,19 @@ void YUY2ToUVRow_MSA(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
}
|
||||
}
|
||||
|
||||
void YUY2ToUV422Row_MSA(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||
void YUY2ToUV422Row_MSA(const uint8* src_yuy2,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1;
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
|
||||
src0 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
|
||||
src1 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
|
||||
dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
|
||||
dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
|
||||
src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
ST_UB(dst0, dst_u);
|
||||
ST_UB(dst1, dst_v);
|
||||
src_yuy2 += 64;
|
||||
@ -489,17 +517,20 @@ void UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width) {
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
|
||||
dst0 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
|
||||
dst1 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
|
||||
dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
|
||||
ST_UB2(dst0, dst1, dst_y, 16);
|
||||
src_uyvy += 64;
|
||||
dst_y += 32;
|
||||
}
|
||||
}
|
||||
|
||||
void UYVYToUVRow_MSA(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
const uint8 *src_uyvy_next = src_uyvy + src_stride_uyvy;
|
||||
void UYVYToUVRow_MSA(const uint8* src_uyvy,
|
||||
int src_stride_uyvy,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_uyvy_next = src_uyvy + src_stride_uyvy;
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 vec0, vec1, dst0, dst1;
|
||||
@ -507,14 +538,14 @@ void UYVYToUVRow_MSA(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
for (x = 0; x < width; x += 32) {
|
||||
LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
|
||||
LD_UB4(src_uyvy_next, 16, src4, src5, src6, src7);
|
||||
src0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
|
||||
src1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
|
||||
src2 = (v16u8) __msa_pckev_b((v16i8) src5, (v16i8) src4);
|
||||
src3 = (v16u8) __msa_pckev_b((v16i8) src7, (v16i8) src6);
|
||||
src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||
src2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
|
||||
src3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6);
|
||||
vec0 = __msa_aver_u_b(src0, src2);
|
||||
vec1 = __msa_aver_u_b(src1, src3);
|
||||
dst0 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0);
|
||||
dst1 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||
dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
|
||||
ST_UB(dst0, dst_u);
|
||||
ST_UB(dst1, dst_v);
|
||||
src_uyvy += 64;
|
||||
@ -524,17 +555,19 @@ void UYVYToUVRow_MSA(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
}
|
||||
}
|
||||
|
||||
void UYVYToUV422Row_MSA(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||
void UYVYToUV422Row_MSA(const uint8* src_uyvy,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1;
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
|
||||
src0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
|
||||
src1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
|
||||
dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
|
||||
dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
|
||||
src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
ST_UB(dst0, dst_u);
|
||||
ST_UB(dst1, dst_v);
|
||||
src_uyvy += 64;
|
||||
@ -547,27 +580,27 @@ void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
|
||||
int x;
|
||||
v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
|
||||
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
|
||||
v16i8 zero = { 0 };
|
||||
v8u16 const_0x19 = (v8u16) __msa_ldi_h(0x19);
|
||||
v8u16 const_0x81 = (v8u16) __msa_ldi_h(0x81);
|
||||
v8u16 const_0x42 = (v8u16) __msa_ldi_h(0x42);
|
||||
v8u16 const_0x1080 = (v8u16) __msa_fill_h(0x1080);
|
||||
v16i8 zero = {0};
|
||||
v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19);
|
||||
v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81);
|
||||
v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42);
|
||||
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 0);
|
||||
src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 16);
|
||||
src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 32);
|
||||
src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 48);
|
||||
vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
|
||||
vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
|
||||
vec2 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
|
||||
vec3 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
|
||||
reg0 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec0);
|
||||
reg1 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec1);
|
||||
reg2 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec2);
|
||||
reg3 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec3);
|
||||
reg4 = (v8u16) __msa_ilvod_b(zero, (v16i8) vec0);
|
||||
reg5 = (v8u16) __msa_ilvod_b(zero, (v16i8) vec1);
|
||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
|
||||
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||
vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
|
||||
reg0 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec0);
|
||||
reg1 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec1);
|
||||
reg2 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec2);
|
||||
reg3 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec3);
|
||||
reg4 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec0);
|
||||
reg5 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec1);
|
||||
reg0 *= const_0x19;
|
||||
reg1 *= const_0x19;
|
||||
reg2 *= const_0x81;
|
||||
@ -580,93 +613,96 @@ void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) {
|
||||
reg1 += reg5;
|
||||
reg0 += const_0x1080;
|
||||
reg1 += const_0x1080;
|
||||
reg0 = (v8u16) __msa_srai_h((v8i16) reg0, 8);
|
||||
reg1 = (v8u16) __msa_srai_h((v8i16) reg1, 8);
|
||||
dst0 = (v16u8) __msa_pckev_b((v16i8) reg1, (v16i8) reg0);
|
||||
reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8);
|
||||
reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
|
||||
ST_UB(dst0, dst_y);
|
||||
src_argb0 += 64;
|
||||
dst_y += 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBToUVRow_MSA(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGBToUVRow_MSA(const uint8* src_argb0,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
int x;
|
||||
const uint8* src_argb0_next = src_argb0 + src_stride_argb;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
||||
v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9;
|
||||
v16u8 dst0, dst1;
|
||||
v8u16 const_0x70 = (v8u16) __msa_ldi_h(0x70);
|
||||
v8u16 const_0x4A = (v8u16) __msa_ldi_h(0x4A);
|
||||
v8u16 const_0x26 = (v8u16) __msa_ldi_h(0x26);
|
||||
v8u16 const_0x5E = (v8u16) __msa_ldi_h(0x5E);
|
||||
v8u16 const_0x12 = (v8u16) __msa_ldi_h(0x12);
|
||||
v8u16 const_0x8080 = (v8u16) __msa_fill_h(0x8080);
|
||||
v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70);
|
||||
v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A);
|
||||
v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26);
|
||||
v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E);
|
||||
v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12);
|
||||
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
|
||||
|
||||
for (x = 0; x < width; x += 32) {
|
||||
src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 0);
|
||||
src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 16);
|
||||
src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 32);
|
||||
src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 48);
|
||||
src4 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 64);
|
||||
src5 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 80);
|
||||
src6 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 96);
|
||||
src7 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 112);
|
||||
vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
|
||||
vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
|
||||
vec2 = (v16u8) __msa_pckev_b((v16i8) src5, (v16i8) src4);
|
||||
vec3 = (v16u8) __msa_pckev_b((v16i8) src7, (v16i8) src6);
|
||||
vec4 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
|
||||
vec5 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
|
||||
vec6 = (v16u8) __msa_pckod_b((v16i8) src5, (v16i8) src4);
|
||||
vec7 = (v16u8) __msa_pckod_b((v16i8) src7, (v16i8) src6);
|
||||
vec8 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0);
|
||||
vec9 = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2);
|
||||
vec4 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec4);
|
||||
vec5 = (v16u8) __msa_pckev_b((v16i8) vec7, (v16i8) vec6);
|
||||
vec0 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0);
|
||||
vec1 = (v16u8) __msa_pckod_b((v16i8) vec3, (v16i8) vec2);
|
||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48);
|
||||
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64);
|
||||
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80);
|
||||
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96);
|
||||
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112);
|
||||
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||
vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
|
||||
vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6);
|
||||
vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
|
||||
vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4);
|
||||
vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6);
|
||||
vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||
vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
|
||||
vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4);
|
||||
vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
|
||||
vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
|
||||
vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2);
|
||||
reg0 = __msa_hadd_u_h(vec8, vec8);
|
||||
reg1 = __msa_hadd_u_h(vec9, vec9);
|
||||
reg2 = __msa_hadd_u_h(vec4, vec4);
|
||||
reg3 = __msa_hadd_u_h(vec5, vec5);
|
||||
reg4 = __msa_hadd_u_h(vec0, vec0);
|
||||
reg5 = __msa_hadd_u_h(vec1, vec1);
|
||||
src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 0);
|
||||
src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 16);
|
||||
src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 32);
|
||||
src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 48);
|
||||
src4 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 64);
|
||||
src5 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 80);
|
||||
src6 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 96);
|
||||
src7 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 112);
|
||||
vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
|
||||
vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
|
||||
vec2 = (v16u8) __msa_pckev_b((v16i8) src5, (v16i8) src4);
|
||||
vec3 = (v16u8) __msa_pckev_b((v16i8) src7, (v16i8) src6);
|
||||
vec4 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
|
||||
vec5 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
|
||||
vec6 = (v16u8) __msa_pckod_b((v16i8) src5, (v16i8) src4);
|
||||
vec7 = (v16u8) __msa_pckod_b((v16i8) src7, (v16i8) src6);
|
||||
vec8 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0);
|
||||
vec9 = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2);
|
||||
vec4 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec4);
|
||||
vec5 = (v16u8) __msa_pckev_b((v16i8) vec7, (v16i8) vec6);
|
||||
vec0 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0);
|
||||
vec1 = (v16u8) __msa_pckod_b((v16i8) vec3, (v16i8) vec2);
|
||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32);
|
||||
src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48);
|
||||
src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64);
|
||||
src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80);
|
||||
src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96);
|
||||
src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112);
|
||||
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
|
||||
vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
|
||||
vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4);
|
||||
vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6);
|
||||
vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
|
||||
vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
|
||||
vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4);
|
||||
vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6);
|
||||
vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
|
||||
vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2);
|
||||
vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4);
|
||||
vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
|
||||
vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
|
||||
vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2);
|
||||
reg0 += __msa_hadd_u_h(vec8, vec8);
|
||||
reg1 += __msa_hadd_u_h(vec9, vec9);
|
||||
reg2 += __msa_hadd_u_h(vec4, vec4);
|
||||
reg3 += __msa_hadd_u_h(vec5, vec5);
|
||||
reg4 += __msa_hadd_u_h(vec0, vec0);
|
||||
reg5 += __msa_hadd_u_h(vec1, vec1);
|
||||
reg0 = (v8u16) __msa_srai_h((v8i16) reg0, 2);
|
||||
reg1 = (v8u16) __msa_srai_h((v8i16) reg1, 2);
|
||||
reg2 = (v8u16) __msa_srai_h((v8i16) reg2, 2);
|
||||
reg3 = (v8u16) __msa_srai_h((v8i16) reg3, 2);
|
||||
reg4 = (v8u16) __msa_srai_h((v8i16) reg4, 2);
|
||||
reg5 = (v8u16) __msa_srai_h((v8i16) reg5, 2);
|
||||
reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 2);
|
||||
reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 2);
|
||||
reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 2);
|
||||
reg3 = (v8u16)__msa_srai_h((v8i16)reg3, 2);
|
||||
reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 2);
|
||||
reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 2);
|
||||
reg6 = reg0 * const_0x70;
|
||||
reg7 = reg1 * const_0x70;
|
||||
reg8 = reg2 * const_0x4A;
|
||||
@ -689,12 +725,12 @@ void ARGBToUVRow_MSA(const uint8* src_argb0, int src_stride_argb,
|
||||
reg7 -= reg9;
|
||||
reg4 -= reg2;
|
||||
reg5 -= reg3;
|
||||
reg6 = (v8u16) __msa_srai_h((v8i16) reg6, 8);
|
||||
reg7 = (v8u16) __msa_srai_h((v8i16) reg7, 8);
|
||||
reg4 = (v8u16) __msa_srai_h((v8i16) reg4, 8);
|
||||
reg5 = (v8u16) __msa_srai_h((v8i16) reg5, 8);
|
||||
dst0 = (v16u8) __msa_pckev_b((v16i8) reg7, (v16i8) reg6);
|
||||
dst1 = (v16u8) __msa_pckev_b((v16i8) reg5, (v16i8) reg4);
|
||||
reg6 = (v8u16)__msa_srai_h((v8i16)reg6, 8);
|
||||
reg7 = (v8u16)__msa_srai_h((v8i16)reg7, 8);
|
||||
reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 8);
|
||||
reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 8);
|
||||
dst0 = (v16u8)__msa_pckev_b((v16i8)reg7, (v16i8)reg6);
|
||||
dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
|
||||
ST_UB(dst0, dst_u);
|
||||
ST_UB(dst1, dst_v);
|
||||
src_argb0 += 128;
|
||||
@ -704,7 +740,8 @@ void ARGBToUVRow_MSA(const uint8* src_argb0, int src_stride_argb,
|
||||
}
|
||||
}
|
||||
|
||||
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, uint8* dst_argb,
|
||||
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
int x;
|
||||
v16u8 src0, src1;
|
||||
@ -712,20 +749,20 @@ void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, uint8* dst_argb,
|
||||
v16u8 dst0, dst1, dst2, dst3;
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
src0 = (v16u8) __msa_ld_b((v16u8*) src_argb4444, 0);
|
||||
src1 = (v16u8) __msa_ld_b((v16u8*) src_argb4444, 16);
|
||||
vec0 = (v8u16) __msa_andi_b(src0, 0x0F);
|
||||
vec1 = (v8u16) __msa_andi_b(src1, 0x0F);
|
||||
vec2 = (v8u16) __msa_andi_b(src0, 0xF0);
|
||||
vec3 = (v8u16) __msa_andi_b(src1, 0xF0);
|
||||
vec0 |= (v8u16) __msa_slli_b((v16i8) vec0, 4);
|
||||
vec1 |= (v8u16) __msa_slli_b((v16i8) vec1, 4);
|
||||
vec2 |= (v8u16) __msa_srli_b((v16i8) vec2, 4);
|
||||
vec3 |= (v8u16) __msa_srli_b((v16i8) vec3, 4);
|
||||
dst0 = (v16u8) __msa_ilvr_b((v16i8) vec2, (v16i8) vec0);
|
||||
dst1 = (v16u8) __msa_ilvl_b((v16i8) vec2, (v16i8) vec0);
|
||||
dst2 = (v16u8) __msa_ilvr_b((v16i8) vec3, (v16i8) vec1);
|
||||
dst3 = (v16u8) __msa_ilvl_b((v16i8) vec3, (v16i8) vec1);
|
||||
src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 16);
|
||||
vec0 = (v8u16)__msa_andi_b(src0, 0x0F);
|
||||
vec1 = (v8u16)__msa_andi_b(src1, 0x0F);
|
||||
vec2 = (v8u16)__msa_andi_b(src0, 0xF0);
|
||||
vec3 = (v8u16)__msa_andi_b(src1, 0xF0);
|
||||
vec0 |= (v8u16)__msa_slli_b((v16i8)vec0, 4);
|
||||
vec1 |= (v8u16)__msa_slli_b((v16i8)vec1, 4);
|
||||
vec2 |= (v8u16)__msa_srli_b((v16i8)vec2, 4);
|
||||
vec3 |= (v8u16)__msa_srli_b((v16i8)vec3, 4);
|
||||
dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0);
|
||||
dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0);
|
||||
dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1);
|
||||
dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16);
|
||||
src_argb4444 += 32;
|
||||
dst_argb += 64;
|
||||
|
||||
@ -22,18 +22,18 @@ extern "C" {
|
||||
!defined(__aarch64__)
|
||||
|
||||
// Read 8 Y, 4 U and 4 V from 422
|
||||
#define READYUV422 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
#define READYUV422 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
MEMACCESS(1) \
|
||||
"vld1.32 {d2[0]}, [%1]! \n" \
|
||||
MEMACCESS(2) \
|
||||
"vld1.32 {d2[1]}, [%2]! \n"
|
||||
|
||||
// Read 8 Y, 8 U and 8 V from 444
|
||||
#define READYUV444 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
#define READYUV444 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
MEMACCESS(1) \
|
||||
"vld1.8 {d2}, [%1]! \n" \
|
||||
MEMACCESS(2) \
|
||||
@ -42,15 +42,15 @@ extern "C" {
|
||||
"vrshrn.u16 d2, q1, #1 \n"
|
||||
|
||||
// Read 8 Y, and set 4 U and 4 V to 128
|
||||
#define READYUV400 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
"vmov.u8 d2, #128 \n"
|
||||
#define READYUV400 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
"vmov.u8 d2, #128 \n"
|
||||
|
||||
// Read 8 Y and 4 UV from NV12
|
||||
#define READNV12 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
#define READNV12 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
MEMACCESS(1) \
|
||||
"vld1.8 {d2}, [%1]! \n" \
|
||||
"vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
|
||||
@ -58,9 +58,9 @@ extern "C" {
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
// Read 8 Y and 4 VU from NV21
|
||||
#define READNV21 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
#define READNV21 \
|
||||
MEMACCESS(0) \
|
||||
"vld1.8 {d0}, [%0]! \n" \
|
||||
MEMACCESS(1) \
|
||||
"vld1.8 {d2}, [%1]! \n" \
|
||||
"vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
|
||||
@ -68,25 +68,25 @@ extern "C" {
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
// Read 8 YUY2
|
||||
#define READYUY2 \
|
||||
MEMACCESS(0) \
|
||||
"vld2.8 {d0, d2}, [%0]! \n" \
|
||||
"vmov.u8 d3, d2 \n" \
|
||||
"vuzp.u8 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
#define READYUY2 \
|
||||
MEMACCESS(0) \
|
||||
"vld2.8 {d0, d2}, [%0]! \n" \
|
||||
"vmov.u8 d3, d2 \n" \
|
||||
"vuzp.u8 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
// Read 8 UYVY
|
||||
#define READUYVY \
|
||||
MEMACCESS(0) \
|
||||
"vld2.8 {d2, d3}, [%0]! \n" \
|
||||
"vmov.u8 d0, d3 \n" \
|
||||
"vmov.u8 d3, d2 \n" \
|
||||
"vuzp.u8 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
#define READUYVY \
|
||||
MEMACCESS(0) \
|
||||
"vld2.8 {d2, d3}, [%0]! \n" \
|
||||
"vmov.u8 d0, d3 \n" \
|
||||
"vmov.u8 d3, d2 \n" \
|
||||
"vuzp.u8 d2, d3 \n" \
|
||||
"vtrn.u32 d2, d3 \n"
|
||||
|
||||
#define YUVTORGB_SETUP \
|
||||
MEMACCESS([kUVToRB]) \
|
||||
"vld1.8 {d24}, [%[kUVToRB]] \n" \
|
||||
#define YUVTORGB_SETUP \
|
||||
MEMACCESS([kUVToRB]) \
|
||||
"vld1.8 {d24}, [%[kUVToRB]] \n" \
|
||||
MEMACCESS([kUVToG]) \
|
||||
"vld1.8 {d25}, [%[kUVToG]] \n" \
|
||||
MEMACCESS([kUVBiasBGR]) \
|
||||
@ -98,32 +98,32 @@ extern "C" {
|
||||
MEMACCESS([kYToRgb]) \
|
||||
"vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n"
|
||||
|
||||
#define YUVTORGB \
|
||||
"vmull.u8 q8, d2, d24 \n" /* u/v B/R component */\
|
||||
"vmull.u8 q9, d2, d25 \n" /* u/v G component */\
|
||||
"vmovl.u8 q0, d0 \n" /* Y */\
|
||||
"vmovl.s16 q10, d1 \n" \
|
||||
"vmovl.s16 q0, d0 \n" \
|
||||
"vmul.s32 q10, q10, q15 \n" \
|
||||
"vmul.s32 q0, q0, q15 \n" \
|
||||
"vqshrun.s32 d0, q0, #16 \n" \
|
||||
"vqshrun.s32 d1, q10, #16 \n" /* Y */\
|
||||
"vadd.s16 d18, d19 \n" \
|
||||
"vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */\
|
||||
"vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */\
|
||||
"vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/\
|
||||
"vaddw.u16 q1, q1, d16 \n" \
|
||||
"vaddw.u16 q10, q10, d17 \n" \
|
||||
"vaddw.u16 q3, q3, d18 \n" \
|
||||
"vqadd.s16 q8, q0, q13 \n" /* B */ \
|
||||
"vqadd.s16 q9, q0, q14 \n" /* R */ \
|
||||
"vqadd.s16 q0, q0, q4 \n" /* G */ \
|
||||
"vqadd.s16 q8, q8, q1 \n" /* B */ \
|
||||
"vqadd.s16 q9, q9, q10 \n" /* R */ \
|
||||
"vqsub.s16 q0, q0, q3 \n" /* G */ \
|
||||
"vqshrun.s16 d20, q8, #6 \n" /* B */ \
|
||||
"vqshrun.s16 d22, q9, #6 \n" /* R */ \
|
||||
"vqshrun.s16 d21, q0, #6 \n" /* G */
|
||||
#define YUVTORGB \
|
||||
"vmull.u8 q8, d2, d24 \n" /* u/v B/R component */ \
|
||||
"vmull.u8 q9, d2, d25 \n" /* u/v G component */ \
|
||||
"vmovl.u8 q0, d0 \n" /* Y */ \
|
||||
"vmovl.s16 q10, d1 \n" \
|
||||
"vmovl.s16 q0, d0 \n" \
|
||||
"vmul.s32 q10, q10, q15 \n" \
|
||||
"vmul.s32 q0, q0, q15 \n" \
|
||||
"vqshrun.s32 d0, q0, #16 \n" \
|
||||
"vqshrun.s32 d1, q10, #16 \n" /* Y */ \
|
||||
"vadd.s16 d18, d19 \n" \
|
||||
"vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */ \
|
||||
"vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */ \
|
||||
"vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/ \
|
||||
"vaddw.u16 q1, q1, d16 \n" \
|
||||
"vaddw.u16 q10, q10, d17 \n" \
|
||||
"vaddw.u16 q3, q3, d18 \n" \
|
||||
"vqadd.s16 q8, q0, q13 \n" /* B */ \
|
||||
"vqadd.s16 q9, q0, q14 \n" /* R */ \
|
||||
"vqadd.s16 q0, q0, q4 \n" /* G */ \
|
||||
"vqadd.s16 q8, q8, q1 \n" /* B */ \
|
||||
"vqadd.s16 q9, q9, q10 \n" /* R */ \
|
||||
"vqsub.s16 q0, q0, q3 \n" /* G */ \
|
||||
"vqshrun.s16 d20, q8, #6 \n" /* B */ \
|
||||
"vqshrun.s16 d22, q9, #6 \n" /* R */ \
|
||||
"vqshrun.s16 d21, q0, #6 \n" /* G */
|
||||
|
||||
void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -277,12 +277,12 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGBTORGB565 \
|
||||
"vshll.u8 q0, d22, #8 \n" /* R */ \
|
||||
"vshll.u8 q8, d21, #8 \n" /* G */ \
|
||||
"vshll.u8 q9, d20, #8 \n" /* B */ \
|
||||
"vsri.16 q0, q8, #5 \n" /* RG */ \
|
||||
"vsri.16 q0, q9, #11 \n" /* RGB */
|
||||
#define ARGBTORGB565 \
|
||||
"vshll.u8 q0, d22, #8 \n" /* R */ \
|
||||
"vshll.u8 q8, d21, #8 \n" /* G */ \
|
||||
"vshll.u8 q9, d20, #8 \n" /* B */ \
|
||||
"vsri.16 q0, q8, #5 \n" /* RG */ \
|
||||
"vsri.16 q0, q9, #11 \n" /* RGB */
|
||||
|
||||
void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -314,14 +314,14 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGBTOARGB1555 \
|
||||
"vshll.u8 q0, d23, #8 \n" /* A */ \
|
||||
"vshll.u8 q8, d22, #8 \n" /* R */ \
|
||||
"vshll.u8 q9, d21, #8 \n" /* G */ \
|
||||
"vshll.u8 q10, d20, #8 \n" /* B */ \
|
||||
"vsri.16 q0, q8, #1 \n" /* AR */ \
|
||||
"vsri.16 q0, q9, #6 \n" /* ARG */ \
|
||||
"vsri.16 q0, q10, #11 \n" /* ARGB */
|
||||
#define ARGBTOARGB1555 \
|
||||
"vshll.u8 q0, d23, #8 \n" /* A */ \
|
||||
"vshll.u8 q8, d22, #8 \n" /* R */ \
|
||||
"vshll.u8 q9, d21, #8 \n" /* G */ \
|
||||
"vshll.u8 q10, d20, #8 \n" /* B */ \
|
||||
"vsri.16 q0, q8, #1 \n" /* AR */ \
|
||||
"vsri.16 q0, q9, #6 \n" /* ARG */ \
|
||||
"vsri.16 q0, q10, #11 \n" /* ARGB */
|
||||
|
||||
void I422ToARGB1555Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -354,14 +354,14 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGBTOARGB4444 \
|
||||
"vshr.u8 d20, d20, #4 \n" /* B */ \
|
||||
"vbic.32 d21, d21, d4 \n" /* G */ \
|
||||
"vshr.u8 d22, d22, #4 \n" /* R */ \
|
||||
"vbic.32 d23, d23, d4 \n" /* A */ \
|
||||
"vorr d0, d20, d21 \n" /* BG */ \
|
||||
"vorr d1, d22, d23 \n" /* RA */ \
|
||||
"vzip.u8 d0, d1 \n" /* BGRA */
|
||||
#define ARGBTOARGB4444 \
|
||||
"vshr.u8 d20, d20, #4 \n" /* B */ \
|
||||
"vbic.32 d21, d21, d4 \n" /* G */ \
|
||||
"vshr.u8 d22, d22, #4 \n" /* R */ \
|
||||
"vbic.32 d23, d23, d4 \n" /* A */ \
|
||||
"vorr d0, d20, d21 \n" /* BG */ \
|
||||
"vorr d1, d22, d23 \n" /* RA */ \
|
||||
"vzip.u8 d0, d1 \n" /* BGRA */
|
||||
|
||||
void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -395,9 +395,7 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"vmov.u8 d23, #255 \n"
|
||||
@ -420,9 +418,7 @@ void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void J400ToARGBRow_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d23, #255 \n"
|
||||
"1: \n"
|
||||
@ -579,7 +575,9 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
||||
}
|
||||
|
||||
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
|
||||
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
void SplitUVRow_NEON(const uint8* src_uv,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -601,7 +599,9 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
}
|
||||
|
||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
void MergeUVRow_NEON(const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_uv,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -698,7 +698,9 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
void MirrorUVRow_NEON(const uint8* src_uv,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// Start at end of source row.
|
||||
@ -805,17 +807,17 @@ void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
#define RGB565TOARGB \
|
||||
"vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \
|
||||
"vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \
|
||||
"vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \
|
||||
"vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \
|
||||
"vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
|
||||
"vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
|
||||
"vorr.u8 d0, d0, d4 \n" /* B */ \
|
||||
"vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \
|
||||
"vorr.u8 d2, d1, d5 \n" /* R */ \
|
||||
"vorr.u8 d1, d4, d6 \n" /* G */
|
||||
#define RGB565TOARGB \
|
||||
"vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \
|
||||
"vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \
|
||||
"vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \
|
||||
"vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \
|
||||
"vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
|
||||
"vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
|
||||
"vorr.u8 d0, d0, d4 \n" /* B */ \
|
||||
"vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \
|
||||
"vorr.u8 d2, d1, d5 \n" /* R */ \
|
||||
"vorr.u8 d1, d4, d6 \n" /* G */
|
||||
|
||||
void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
@ -836,34 +838,35 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGB1555TOARGB \
|
||||
"vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \
|
||||
"vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \
|
||||
"vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \
|
||||
"vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
|
||||
"vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \
|
||||
"vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \
|
||||
"vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \
|
||||
"vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \
|
||||
"vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \
|
||||
"vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \
|
||||
"vorr.u8 q1, q1, q3 \n" /* R,A */ \
|
||||
"vorr.u8 q0, q0, q2 \n" /* B,G */ \
|
||||
#define ARGB1555TOARGB \
|
||||
"vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \
|
||||
"vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \
|
||||
"vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \
|
||||
"vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
|
||||
"vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \
|
||||
"vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \
|
||||
"vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \
|
||||
"vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \
|
||||
"vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \
|
||||
"vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \
|
||||
"vorr.u8 q1, q1, q3 \n" /* R,A */ \
|
||||
"vorr.u8 q0, q0, q2 \n" /* B,G */
|
||||
|
||||
// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
|
||||
#define RGB555TOARGB \
|
||||
"vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \
|
||||
"vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \
|
||||
"vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \
|
||||
"vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \
|
||||
"vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
|
||||
"vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
|
||||
"vorr.u8 d0, d0, d4 \n" /* B */ \
|
||||
"vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \
|
||||
"vorr.u8 d2, d1, d5 \n" /* R */ \
|
||||
"vorr.u8 d1, d4, d6 \n" /* G */
|
||||
#define RGB555TOARGB \
|
||||
"vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \
|
||||
"vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \
|
||||
"vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \
|
||||
"vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \
|
||||
"vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \
|
||||
"vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \
|
||||
"vorr.u8 d0, d0, d4 \n" /* B */ \
|
||||
"vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \
|
||||
"vorr.u8 d2, d1, d5 \n" /* R */ \
|
||||
"vorr.u8 d1, d4, d6 \n" /* G */
|
||||
|
||||
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
|
||||
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // Alpha
|
||||
@ -883,17 +886,18 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGB4444TOARGB \
|
||||
"vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \
|
||||
"vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \
|
||||
"vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \
|
||||
"vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \
|
||||
"vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \
|
||||
"vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \
|
||||
"vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \
|
||||
"vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
|
||||
#define ARGB4444TOARGB \
|
||||
"vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \
|
||||
"vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \
|
||||
"vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \
|
||||
"vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \
|
||||
"vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \
|
||||
"vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \
|
||||
"vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \
|
||||
"vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
|
||||
|
||||
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
|
||||
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // Alpha
|
||||
@ -982,7 +986,9 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||
void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -1003,7 +1009,9 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||
void UYVYToUV422Row_NEON(const uint8* src_uyvy,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -1024,8 +1032,11 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void YUY2ToUVRow_NEON(const uint8* src_yuy2,
|
||||
int stride_yuy2,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // stride + src_yuy2
|
||||
"1: \n"
|
||||
@ -1051,8 +1062,11 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void UYVYToUVRow_NEON(const uint8* src_uyvy,
|
||||
int stride_uyvy,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // stride + src_uyvy
|
||||
"1: \n"
|
||||
@ -1079,8 +1093,10 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
}
|
||||
|
||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
const uint8* shuffler, int width) {
|
||||
void ARGBShuffleRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
int width) {
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"vld1.8 {q2}, [%3] \n" // shuffler
|
||||
@ -1104,7 +1120,8 @@ void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
void I422ToYUY2Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_yuy2, int width) {
|
||||
uint8* dst_yuy2,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -1130,7 +1147,8 @@ void I422ToYUY2Row_NEON(const uint8* src_y,
|
||||
void I422ToUYVYRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_uyvy, int width) {
|
||||
uint8* dst_uyvy,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -1171,8 +1189,10 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
|
||||
const uint32 dither4, int width) {
|
||||
void ARGBToRGB565DitherRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_rgb,
|
||||
const uint32 dither4,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vdup.32 d2, %2 \n" // dither4
|
||||
"1: \n"
|
||||
@ -1194,7 +1214,8 @@ void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
|
||||
void ARGBToARGB1555Row_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb1555,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -1213,7 +1234,8 @@ void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
|
||||
void ARGBToARGB4444Row_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb4444,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
|
||||
@ -1302,7 +1324,9 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
|
||||
}
|
||||
|
||||
// 8x1 pixels.
|
||||
void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
void ARGBToUV444Row_NEON(const uint8* src_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient
|
||||
@ -1343,21 +1367,30 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
#define RGBTOUV(QB, QG, QR) \
|
||||
"vmul.s16 q8, " #QB ", q10 \n" /* B */ \
|
||||
"vmls.s16 q8, " #QG ", q11 \n" /* G */ \
|
||||
"vmls.s16 q8, " #QR ", q12 \n" /* R */ \
|
||||
"vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
|
||||
"vmul.s16 q9, " #QR ", q10 \n" /* R */ \
|
||||
"vmls.s16 q9, " #QG ", q14 \n" /* G */ \
|
||||
"vmls.s16 q9, " #QB ", q13 \n" /* B */ \
|
||||
"vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
|
||||
"vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
|
||||
"vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
|
||||
#define RGBTOUV(QB, QG, QR) \
|
||||
"vmul.s16 q8, " #QB \
|
||||
", q10 \n" /* B */ \
|
||||
"vmls.s16 q8, " #QG \
|
||||
", q11 \n" /* G */ \
|
||||
"vmls.s16 q8, " #QR \
|
||||
", q12 \n" /* R */ \
|
||||
"vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
|
||||
"vmul.s16 q9, " #QR \
|
||||
", q10 \n" /* R */ \
|
||||
"vmls.s16 q9, " #QG \
|
||||
", q14 \n" /* G */ \
|
||||
"vmls.s16 q9, " #QB \
|
||||
", q13 \n" /* B */ \
|
||||
"vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
|
||||
"vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \
|
||||
"vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */
|
||||
|
||||
// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -1405,8 +1438,11 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Subsample match C code.
|
||||
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGBToUVJRow_NEON(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
|
||||
@ -1453,8 +1489,11 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
);
|
||||
}
|
||||
|
||||
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void BGRAToUVRow_NEON(const uint8* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_bgra
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -1501,8 +1540,11 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
);
|
||||
}
|
||||
|
||||
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ABGRToUVRow_NEON(const uint8* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_abgr
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -1549,8 +1591,11 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
);
|
||||
}
|
||||
|
||||
void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void RGBAToUVRow_NEON(const uint8* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_rgba
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -1597,8 +1642,11 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
|
||||
);
|
||||
}
|
||||
|
||||
void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void RGB24ToUVRow_NEON(const uint8* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_rgb24
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -1645,8 +1693,11 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
);
|
||||
}
|
||||
|
||||
void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void RAWToUVRow_NEON(const uint8* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_raw
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -1694,8 +1745,11 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -1763,8 +1817,11 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555,
|
||||
int src_stride_argb1555,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -1832,8 +1889,11 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444,
|
||||
int src_stride_argb4444,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||
"vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
|
||||
@ -2113,8 +2173,10 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
|
||||
|
||||
// Bilinear filter 16x2 -> 16x1
|
||||
void InterpolateRow_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
int dst_width, int source_y_fraction) {
|
||||
const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
int dst_width,
|
||||
int source_y_fraction) {
|
||||
int y1_fraction = source_y_fraction;
|
||||
asm volatile (
|
||||
"cmp %4, #0 \n"
|
||||
@ -2178,8 +2240,10 @@ void InterpolateRow_NEON(uint8* dst_ptr,
|
||||
}
|
||||
|
||||
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
|
||||
void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width) {
|
||||
void ARGBBlendRow_NEON(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"subs %3, #8 \n"
|
||||
"blt 89f \n"
|
||||
@ -2269,8 +2333,11 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
|
||||
// Quantize 8 ARGB pixels (32 bytes).
|
||||
// dst = (dst * scale >> 16) * interval_size + interval_offset;
|
||||
void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
|
||||
int interval_offset, int width) {
|
||||
void ARGBQuantizeRow_NEON(uint8* dst_argb,
|
||||
int scale,
|
||||
int interval_size,
|
||||
int interval_offset,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vdup.u16 q8, %2 \n"
|
||||
"vshr.u16 q8, q8, #1 \n" // scale >>= 1
|
||||
@ -2312,7 +2379,9 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
|
||||
// Shade 8 pixels at a time by specified value.
|
||||
// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
|
||||
// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
|
||||
void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
|
||||
void ARGBShadeRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int width,
|
||||
uint32 value) {
|
||||
asm volatile (
|
||||
"vdup.u32 q0, %3 \n" // duplicate scale value.
|
||||
@ -2421,8 +2490,10 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
|
||||
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
|
||||
// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
|
||||
// needs to saturate. Consider doing a non-saturating version.
|
||||
void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
const int8* matrix_argb, int width) {
|
||||
void ARGBColorMatrixRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const int8* matrix_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors.
|
||||
@ -2482,8 +2553,10 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
}
|
||||
|
||||
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||
void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width) {
|
||||
void ARGBMultiplyRow_NEON(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// 8 pixel loop.
|
||||
"1: \n"
|
||||
@ -2514,8 +2587,10 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
}
|
||||
|
||||
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||
void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width) {
|
||||
void ARGBAddRow_NEON(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// 8 pixel loop.
|
||||
"1: \n"
|
||||
@ -2540,8 +2615,10 @@ void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
}
|
||||
|
||||
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
|
||||
void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width) {
|
||||
void ARGBSubtractRow_NEON(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// 8 pixel loop.
|
||||
"1: \n"
|
||||
@ -2570,8 +2647,10 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
// R = Sobel
|
||||
// G = Sobel
|
||||
// B = Sobel
|
||||
void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
void SobelRow_NEON(const uint8* src_sobelx,
|
||||
const uint8* src_sobely,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // alpha
|
||||
// 8 pixel loop.
|
||||
@ -2597,8 +2676,10 @@ void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
}
|
||||
|
||||
// Adds Sobel X and Sobel Y and stores Sobel into plane.
|
||||
void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_y, int width) {
|
||||
void SobelToPlaneRow_NEON(const uint8* src_sobelx,
|
||||
const uint8* src_sobely,
|
||||
uint8* dst_y,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// 16 pixel loop.
|
||||
"1: \n"
|
||||
@ -2625,8 +2706,10 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
// R = Sobel X
|
||||
// G = Sobel
|
||||
// B = Sobel Y
|
||||
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
void SobelXYRow_NEON(const uint8* src_sobelx,
|
||||
const uint8* src_sobely,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d3, #255 \n" // alpha
|
||||
// 8 pixel loop.
|
||||
@ -2653,8 +2736,11 @@ void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
// -1 0 1
|
||||
// -2 0 2
|
||||
// -1 0 1
|
||||
void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
const uint8* src_y2, uint8* dst_sobelx, int width) {
|
||||
void SobelXRow_NEON(const uint8* src_y0,
|
||||
const uint8* src_y1,
|
||||
const uint8* src_y2,
|
||||
uint8* dst_sobelx,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -2696,8 +2782,10 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
// -1 -2 -1
|
||||
// 0 0 0
|
||||
// 1 2 1
|
||||
void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
uint8* dst_sobely, int width) {
|
||||
void SobelYRow_NEON(const uint8* src_y0,
|
||||
const uint8* src_y1,
|
||||
uint8* dst_sobely,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
|
||||
@ -19,18 +19,18 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
// Read 8 Y, 4 U and 4 V from 422
|
||||
#define READYUV422 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
#define READYUV422 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
MEMACCESS(1) \
|
||||
"ld1 {v1.s}[0], [%1], #4 \n" \
|
||||
MEMACCESS(2) \
|
||||
"ld1 {v1.s}[1], [%2], #4 \n"
|
||||
|
||||
// Read 8 Y, 8 U and 8 V from 444
|
||||
#define READYUV444 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
#define READYUV444 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
MEMACCESS(1) \
|
||||
"ld1 {v1.d}[0], [%1], #8 \n" \
|
||||
MEMACCESS(2) \
|
||||
@ -39,15 +39,15 @@ extern "C" {
|
||||
"rshrn v1.8b, v1.8h, #1 \n"
|
||||
|
||||
// Read 8 Y, and set 4 U and 4 V to 128
|
||||
#define READYUV400 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
"movi v1.8b , #128 \n"
|
||||
#define READYUV400 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
"movi v1.8b , #128 \n"
|
||||
|
||||
// Read 8 Y and 4 UV from NV12
|
||||
#define READNV12 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
#define READNV12 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
MEMACCESS(1) \
|
||||
"ld1 {v2.8b}, [%1], #8 \n" \
|
||||
"uzp1 v1.8b, v2.8b, v2.8b \n" \
|
||||
@ -55,9 +55,9 @@ extern "C" {
|
||||
"ins v1.s[1], v3.s[0] \n"
|
||||
|
||||
// Read 8 Y and 4 VU from NV21
|
||||
#define READNV21 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
#define READNV21 \
|
||||
MEMACCESS(0) \
|
||||
"ld1 {v0.8b}, [%0], #8 \n" \
|
||||
MEMACCESS(1) \
|
||||
"ld1 {v2.8b}, [%1], #8 \n" \
|
||||
"uzp1 v3.8b, v2.8b, v2.8b \n" \
|
||||
@ -65,57 +65,65 @@ extern "C" {
|
||||
"ins v1.s[1], v3.s[0] \n"
|
||||
|
||||
// Read 8 YUY2
|
||||
#define READYUY2 \
|
||||
MEMACCESS(0) \
|
||||
"ld2 {v0.8b, v1.8b}, [%0], #16 \n" \
|
||||
"uzp2 v3.8b, v1.8b, v1.8b \n" \
|
||||
"uzp1 v1.8b, v1.8b, v1.8b \n" \
|
||||
"ins v1.s[1], v3.s[0] \n"
|
||||
#define READYUY2 \
|
||||
MEMACCESS(0) \
|
||||
"ld2 {v0.8b, v1.8b}, [%0], #16 \n" \
|
||||
"uzp2 v3.8b, v1.8b, v1.8b \n" \
|
||||
"uzp1 v1.8b, v1.8b, v1.8b \n" \
|
||||
"ins v1.s[1], v3.s[0] \n"
|
||||
|
||||
// Read 8 UYVY
|
||||
#define READUYVY \
|
||||
MEMACCESS(0) \
|
||||
"ld2 {v2.8b, v3.8b}, [%0], #16 \n" \
|
||||
"orr v0.8b, v3.8b, v3.8b \n" \
|
||||
"uzp1 v1.8b, v2.8b, v2.8b \n" \
|
||||
"uzp2 v3.8b, v2.8b, v2.8b \n" \
|
||||
"ins v1.s[1], v3.s[0] \n"
|
||||
#define READUYVY \
|
||||
MEMACCESS(0) \
|
||||
"ld2 {v2.8b, v3.8b}, [%0], #16 \n" \
|
||||
"orr v0.8b, v3.8b, v3.8b \n" \
|
||||
"uzp1 v1.8b, v2.8b, v2.8b \n" \
|
||||
"uzp2 v3.8b, v2.8b, v2.8b \n" \
|
||||
"ins v1.s[1], v3.s[0] \n"
|
||||
|
||||
#define YUVTORGB_SETUP \
|
||||
"ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
|
||||
"ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
|
||||
"ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
|
||||
"ld1r {v31.4s}, [%[kYToRgb]] \n" \
|
||||
"ld2 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \
|
||||
"ld2 {v29.8h, v30.8h}, [%[kUVToG]] \n"
|
||||
#define YUVTORGB_SETUP \
|
||||
"ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
|
||||
"ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
|
||||
"ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
|
||||
"ld1r {v31.4s}, [%[kYToRgb]] \n" \
|
||||
"ld2 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \
|
||||
"ld2 {v29.8h, v30.8h}, [%[kUVToG]] \n"
|
||||
|
||||
#define YUVTORGB(vR, vG, vB) \
|
||||
"uxtl v0.8h, v0.8b \n" /* Extract Y */ \
|
||||
"shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
|
||||
"ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
|
||||
"ushll v0.4s, v0.4h, #0 \n" \
|
||||
"mul v3.4s, v3.4s, v31.4s \n" \
|
||||
"mul v0.4s, v0.4s, v31.4s \n" \
|
||||
"sqshrun v0.4h, v0.4s, #16 \n" \
|
||||
"sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \
|
||||
"uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \
|
||||
"mov v2.d[0], v1.d[1] \n" /* Extract V */ \
|
||||
"uxtl v2.8h, v2.8b \n" \
|
||||
"uxtl v1.8h, v1.8b \n" /* Extract U */ \
|
||||
"mul v3.8h, v1.8h, v27.8h \n" \
|
||||
"mul v5.8h, v1.8h, v29.8h \n" \
|
||||
"mul v6.8h, v2.8h, v30.8h \n" \
|
||||
"mul v7.8h, v2.8h, v28.8h \n" \
|
||||
"sqadd v6.8h, v6.8h, v5.8h \n" \
|
||||
"sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \
|
||||
"sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \
|
||||
"sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \
|
||||
"sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \
|
||||
"sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \
|
||||
"sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \
|
||||
"sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \
|
||||
"sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \
|
||||
"sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \
|
||||
#define YUVTORGB(vR, vG, vB) \
|
||||
"uxtl v0.8h, v0.8b \n" /* Extract Y */ \
|
||||
"shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
|
||||
"ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
|
||||
"ushll v0.4s, v0.4h, #0 \n" \
|
||||
"mul v3.4s, v3.4s, v31.4s \n" \
|
||||
"mul v0.4s, v0.4s, v31.4s \n" \
|
||||
"sqshrun v0.4h, v0.4s, #16 \n" \
|
||||
"sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \
|
||||
"uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \
|
||||
"mov v2.d[0], v1.d[1] \n" /* Extract V */ \
|
||||
"uxtl v2.8h, v2.8b \n" \
|
||||
"uxtl v1.8h, v1.8b \n" /* Extract U */ \
|
||||
"mul v3.8h, v1.8h, v27.8h \n" \
|
||||
"mul v5.8h, v1.8h, v29.8h \n" \
|
||||
"mul v6.8h, v2.8h, v30.8h \n" \
|
||||
"mul v7.8h, v2.8h, v28.8h \n" \
|
||||
"sqadd v6.8h, v6.8h, v5.8h \n" \
|
||||
"sqadd " #vB \
|
||||
".8h, v24.8h, v0.8h \n" /* B */ \
|
||||
"sqadd " #vG \
|
||||
".8h, v25.8h, v0.8h \n" /* G */ \
|
||||
"sqadd " #vR \
|
||||
".8h, v26.8h, v0.8h \n" /* R */ \
|
||||
"sqadd " #vB ".8h, " #vB \
|
||||
".8h, v3.8h \n" /* B */ \
|
||||
"sqsub " #vG ".8h, " #vG \
|
||||
".8h, v6.8h \n" /* G */ \
|
||||
"sqadd " #vR ".8h, " #vR \
|
||||
".8h, v7.8h \n" /* R */ \
|
||||
"sqshrun " #vB ".8b, " #vB \
|
||||
".8h, #6 \n" /* B */ \
|
||||
"sqshrun " #vG ".8b, " #vG \
|
||||
".8h, #6 \n" /* G */ \
|
||||
"sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */
|
||||
|
||||
void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -269,12 +277,12 @@ void I422ToRGB24Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGBTORGB565 \
|
||||
"shll v0.8h, v22.8b, #8 \n" /* R */ \
|
||||
"shll v21.8h, v21.8b, #8 \n" /* G */ \
|
||||
"shll v20.8h, v20.8b, #8 \n" /* B */ \
|
||||
"sri v0.8h, v21.8h, #5 \n" /* RG */ \
|
||||
"sri v0.8h, v20.8h, #11 \n" /* RGB */
|
||||
#define ARGBTORGB565 \
|
||||
"shll v0.8h, v22.8b, #8 \n" /* R */ \
|
||||
"shll v21.8h, v21.8b, #8 \n" /* G */ \
|
||||
"shll v20.8h, v20.8b, #8 \n" /* B */ \
|
||||
"sri v0.8h, v21.8h, #5 \n" /* RG */ \
|
||||
"sri v0.8h, v20.8h, #11 \n" /* RGB */
|
||||
|
||||
void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -306,14 +314,14 @@ void I422ToRGB565Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGBTOARGB1555 \
|
||||
"shll v0.8h, v23.8b, #8 \n" /* A */ \
|
||||
"shll v22.8h, v22.8b, #8 \n" /* R */ \
|
||||
"shll v21.8h, v21.8b, #8 \n" /* G */ \
|
||||
"shll v20.8h, v20.8b, #8 \n" /* B */ \
|
||||
"sri v0.8h, v22.8h, #1 \n" /* AR */ \
|
||||
"sri v0.8h, v21.8h, #6 \n" /* ARG */ \
|
||||
"sri v0.8h, v20.8h, #11 \n" /* ARGB */
|
||||
#define ARGBTOARGB1555 \
|
||||
"shll v0.8h, v23.8b, #8 \n" /* A */ \
|
||||
"shll v22.8h, v22.8b, #8 \n" /* R */ \
|
||||
"shll v21.8h, v21.8b, #8 \n" /* G */ \
|
||||
"shll v20.8h, v20.8b, #8 \n" /* B */ \
|
||||
"sri v0.8h, v22.8h, #1 \n" /* AR */ \
|
||||
"sri v0.8h, v21.8h, #6 \n" /* ARG */ \
|
||||
"sri v0.8h, v20.8h, #11 \n" /* ARGB */
|
||||
|
||||
void I422ToARGB1555Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -346,15 +354,15 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGBTOARGB4444 \
|
||||
/* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \
|
||||
"ushr v20.8b, v20.8b, #4 \n" /* B */ \
|
||||
"bic v21.8b, v21.8b, v4.8b \n" /* G */ \
|
||||
"ushr v22.8b, v22.8b, #4 \n" /* R */ \
|
||||
"bic v23.8b, v23.8b, v4.8b \n" /* A */ \
|
||||
"orr v0.8b, v20.8b, v21.8b \n" /* BG */ \
|
||||
"orr v1.8b, v22.8b, v23.8b \n" /* RA */ \
|
||||
"zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */
|
||||
#define ARGBTOARGB4444 \
|
||||
/* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \
|
||||
"ushr v20.8b, v20.8b, #4 \n" /* B */ \
|
||||
"bic v21.8b, v21.8b, v4.8b \n" /* G */ \
|
||||
"ushr v22.8b, v22.8b, #4 \n" /* R */ \
|
||||
"bic v23.8b, v23.8b, v4.8b \n" /* A */ \
|
||||
"orr v0.8b, v20.8b, v21.8b \n" /* BG */ \
|
||||
"orr v1.8b, v22.8b, v23.8b \n" /* RA */ \
|
||||
"zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */
|
||||
|
||||
void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
@ -388,9 +396,7 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"movi v23.8b, #255 \n"
|
||||
@ -413,9 +419,7 @@ void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
);
|
||||
}
|
||||
|
||||
void J400ToARGBRow_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
"movi v23.8b, #255 \n"
|
||||
"1: \n"
|
||||
@ -572,7 +576,9 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
|
||||
}
|
||||
|
||||
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
|
||||
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
void SplitUVRow_NEON(const uint8* src_uv,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -594,7 +600,9 @@ void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
}
|
||||
|
||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
void MergeUVRow_NEON(const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_uv,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -688,7 +696,9 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
void MirrorUVRow_NEON(const uint8* src_uv,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// Start at end of source row.
|
||||
@ -794,18 +804,18 @@ void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
#define RGB565TOARGB \
|
||||
"shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \
|
||||
"shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \
|
||||
"ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \
|
||||
"orr v1.8b, v4.8b, v6.8b \n" /* G */ \
|
||||
"xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
|
||||
"ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \
|
||||
"xtn2 v2.16b,v0.8h \n" /* R in upper part */ \
|
||||
"shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \
|
||||
"ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \
|
||||
"orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
|
||||
"dup v2.2D, v0.D[1] \n" /* R */
|
||||
#define RGB565TOARGB \
|
||||
"shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \
|
||||
"shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \
|
||||
"ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \
|
||||
"orr v1.8b, v4.8b, v6.8b \n" /* G */ \
|
||||
"xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
|
||||
"ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \
|
||||
"xtn2 v2.16b,v0.8h \n" /* R in upper part */ \
|
||||
"shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \
|
||||
"ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \
|
||||
"orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
|
||||
"dup v2.2D, v0.D[1] \n" /* R */
|
||||
|
||||
void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
|
||||
asm volatile (
|
||||
@ -826,44 +836,45 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGB1555TOARGB \
|
||||
"ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
|
||||
"shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
|
||||
"xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \
|
||||
\
|
||||
"sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \
|
||||
"xtn2 v3.16b, v2.8h \n" \
|
||||
\
|
||||
"xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
|
||||
"shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
|
||||
\
|
||||
"ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \
|
||||
"shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
|
||||
"ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
|
||||
\
|
||||
"orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
|
||||
"orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \
|
||||
"dup v1.2D, v0.D[1] \n" \
|
||||
"dup v3.2D, v2.D[1] \n"
|
||||
#define ARGB1555TOARGB \
|
||||
"ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
|
||||
"shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
|
||||
"xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \
|
||||
\
|
||||
"sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \
|
||||
"xtn2 v3.16b, v2.8h \n" \
|
||||
\
|
||||
"xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
|
||||
"shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
|
||||
\
|
||||
"ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \
|
||||
"shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
|
||||
"ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
|
||||
\
|
||||
"orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
|
||||
"orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \
|
||||
"dup v1.2D, v0.D[1] \n" \
|
||||
"dup v3.2D, v2.D[1] \n"
|
||||
|
||||
// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
|
||||
#define RGB555TOARGB \
|
||||
"ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
|
||||
"shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
|
||||
"xtn v3.8b, v2.8h \n" /* RRRRR000 */ \
|
||||
\
|
||||
"xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
|
||||
"shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
|
||||
\
|
||||
"ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \
|
||||
"shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
|
||||
"ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
|
||||
\
|
||||
"orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
|
||||
"orr v2.16b, v1.16b, v3.16b \n" /* R */ \
|
||||
"dup v1.2D, v0.D[1] \n" /* G */ \
|
||||
#define RGB555TOARGB \
|
||||
"ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \
|
||||
"shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \
|
||||
"xtn v3.8b, v2.8h \n" /* RRRRR000 */ \
|
||||
\
|
||||
"xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \
|
||||
"shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \
|
||||
\
|
||||
"ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \
|
||||
"shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \
|
||||
"ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \
|
||||
\
|
||||
"orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \
|
||||
"orr v2.16b, v1.16b, v3.16b \n" /* R */ \
|
||||
"dup v1.2D, v0.D[1] \n" /* G */
|
||||
|
||||
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
|
||||
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movi v3.8b, #255 \n" // Alpha
|
||||
@ -883,19 +894,20 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
|
||||
);
|
||||
}
|
||||
|
||||
#define ARGB4444TOARGB \
|
||||
"shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \
|
||||
"xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \
|
||||
"shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \
|
||||
"ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \
|
||||
"ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \
|
||||
"shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \
|
||||
"orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \
|
||||
"orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \
|
||||
"dup v0.2D, v2.D[1] \n" \
|
||||
"dup v1.2D, v3.D[1] \n"
|
||||
#define ARGB4444TOARGB \
|
||||
"shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \
|
||||
"xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \
|
||||
"shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \
|
||||
"ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \
|
||||
"ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \
|
||||
"shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \
|
||||
"orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \
|
||||
"orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \
|
||||
"dup v0.2D, v2.D[1] \n" \
|
||||
"dup v1.2D, v3.D[1] \n"
|
||||
|
||||
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
|
||||
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -984,7 +996,9 @@ void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||
void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -1005,7 +1019,9 @@ void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||
void UYVYToUV422Row_NEON(const uint8* src_uyvy,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -1026,8 +1042,11 @@ void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
|
||||
);
|
||||
}
|
||||
|
||||
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void YUY2ToUVRow_NEON(const uint8* src_yuy2,
|
||||
int stride_yuy2,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_yuy2b = src_yuy2 + stride_yuy2;
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -1054,8 +1073,11 @@ void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
|
||||
);
|
||||
}
|
||||
|
||||
void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void UYVYToUVRow_NEON(const uint8* src_uyvy,
|
||||
int stride_uyvy,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_uyvyb = src_uyvy + stride_uyvy;
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -1083,8 +1105,10 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
}
|
||||
|
||||
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
||||
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
const uint8* shuffler, int width) {
|
||||
void ARGBShuffleRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const uint8* shuffler,
|
||||
int width) {
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"ld1 {v2.16b}, [%3] \n" // shuffler
|
||||
@ -1107,7 +1131,8 @@ void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
void I422ToYUY2Row_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_yuy2, int width) {
|
||||
uint8* dst_yuy2,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -1134,7 +1159,8 @@ void I422ToYUY2Row_NEON(const uint8* src_y,
|
||||
void I422ToUYVYRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_uyvy, int width) {
|
||||
uint8* dst_uyvy,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -1176,8 +1202,10 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) {
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
|
||||
const uint32 dither4, int width) {
|
||||
void ARGBToRGB565DitherRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_rgb,
|
||||
const uint32 dither4,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"dup v1.4s, %w2 \n" // dither4
|
||||
"1: \n"
|
||||
@ -1199,7 +1227,8 @@ void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
|
||||
void ARGBToARGB1555Row_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb1555,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -1218,7 +1247,8 @@ void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
|
||||
);
|
||||
}
|
||||
|
||||
void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
|
||||
void ARGBToARGB4444Row_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb4444,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movi v4.16b, #0x0f \n" // bits to clear with vbic.
|
||||
@ -1306,7 +1336,9 @@ void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) {
|
||||
}
|
||||
|
||||
// 8x1 pixels.
|
||||
void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
void ARGBToUV444Row_NEON(const uint8* src_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movi v24.8b, #112 \n" // UB / VR 0.875 coefficient
|
||||
@ -1347,32 +1379,41 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
|
||||
);
|
||||
}
|
||||
|
||||
#define RGBTOUV_SETUP_REG \
|
||||
"movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
|
||||
"movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
|
||||
"movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
|
||||
"movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
|
||||
"movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
|
||||
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
|
||||
#define RGBTOUV_SETUP_REG \
|
||||
"movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
|
||||
"movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
|
||||
"movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
|
||||
"movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
|
||||
"movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
|
||||
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
#define RGBTOUV(QB, QG, QR) \
|
||||
"mul v3.8h, " #QB ",v20.8h \n" /* B */ \
|
||||
"mul v4.8h, " #QR ",v20.8h \n" /* R */ \
|
||||
"mls v3.8h, " #QG ",v21.8h \n" /* G */ \
|
||||
"mls v4.8h, " #QG ",v24.8h \n" /* G */ \
|
||||
"mls v3.8h, " #QR ",v22.8h \n" /* R */ \
|
||||
"mls v4.8h, " #QB ",v23.8h \n" /* B */ \
|
||||
"add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \
|
||||
"add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \
|
||||
"uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \
|
||||
"uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */
|
||||
#define RGBTOUV(QB, QG, QR) \
|
||||
"mul v3.8h, " #QB \
|
||||
",v20.8h \n" /* B */ \
|
||||
"mul v4.8h, " #QR \
|
||||
",v20.8h \n" /* R */ \
|
||||
"mls v3.8h, " #QG \
|
||||
",v21.8h \n" /* G */ \
|
||||
"mls v4.8h, " #QG \
|
||||
",v24.8h \n" /* G */ \
|
||||
"mls v3.8h, " #QR \
|
||||
",v22.8h \n" /* R */ \
|
||||
"mls v4.8h, " #QB \
|
||||
",v23.8h \n" /* B */ \
|
||||
"add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \
|
||||
"add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \
|
||||
"uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \
|
||||
"uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */
|
||||
|
||||
// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
|
||||
// TODO(fbarchard): consider ptrdiff_t for all strides.
|
||||
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGBToUVRow_NEON(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_argb_1 = src_argb + src_stride_argb;
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -1412,8 +1453,11 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Subsample match C code.
|
||||
void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGBToUVJRow_NEON(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_argb_1 = src_argb + src_stride_argb;
|
||||
asm volatile (
|
||||
"movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
|
||||
@ -1456,8 +1500,11 @@ void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
|
||||
);
|
||||
}
|
||||
|
||||
void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void BGRAToUVRow_NEON(const uint8* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_bgra_1 = src_bgra + src_stride_bgra;
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -1495,8 +1542,11 @@ void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
|
||||
);
|
||||
}
|
||||
|
||||
void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ABGRToUVRow_NEON(const uint8* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_abgr_1 = src_abgr + src_stride_abgr;
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -1534,8 +1584,11 @@ void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
|
||||
);
|
||||
}
|
||||
|
||||
void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void RGBAToUVRow_NEON(const uint8* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_rgba_1 = src_rgba + src_stride_rgba;
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -1573,8 +1626,11 @@ void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
|
||||
);
|
||||
}
|
||||
|
||||
void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void RGB24ToUVRow_NEON(const uint8* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -1612,8 +1668,11 @@ void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
);
|
||||
}
|
||||
|
||||
void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void RAWToUVRow_NEON(const uint8* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_raw_1 = src_raw + src_stride_raw;
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -1652,8 +1711,11 @@ void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void RGB565ToUVRow_NEON(const uint8* src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_rgb565_1 = src_rgb565 + src_stride_rgb565;
|
||||
asm volatile (
|
||||
"movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) / 2
|
||||
@ -1726,8 +1788,11 @@ void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555,
|
||||
int src_stride_argb1555,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_argb1555_1 = src_argb1555 + src_stride_argb1555;
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -1795,8 +1860,11 @@ void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
}
|
||||
|
||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_u, uint8* dst_v, int width) {
|
||||
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444,
|
||||
int src_stride_argb4444,
|
||||
uint8* dst_u,
|
||||
uint8* dst_v,
|
||||
int width) {
|
||||
const uint8* src_argb4444_1 = src_argb4444 + src_stride_argb4444;
|
||||
asm volatile (
|
||||
RGBTOUV_SETUP_REG
|
||||
@ -2078,8 +2146,10 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
|
||||
|
||||
// Bilinear filter 16x2 -> 16x1
|
||||
void InterpolateRow_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
int dst_width, int source_y_fraction) {
|
||||
const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
int dst_width,
|
||||
int source_y_fraction) {
|
||||
int y1_fraction = source_y_fraction;
|
||||
int y0_fraction = 256 - y1_fraction;
|
||||
const uint8* src_ptr1 = src_ptr + src_stride;
|
||||
@ -2144,8 +2214,10 @@ void InterpolateRow_NEON(uint8* dst_ptr,
|
||||
}
|
||||
|
||||
// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
|
||||
void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width) {
|
||||
void ARGBBlendRow_NEON(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"subs %w3, %w3, #8 \n"
|
||||
"b.lt 89f \n"
|
||||
@ -2240,8 +2312,11 @@ void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
|
||||
|
||||
// Quantize 8 ARGB pixels (32 bytes).
|
||||
// dst = (dst * scale >> 16) * interval_size + interval_offset;
|
||||
void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
|
||||
int interval_offset, int width) {
|
||||
void ARGBQuantizeRow_NEON(uint8* dst_argb,
|
||||
int scale,
|
||||
int interval_size,
|
||||
int interval_offset,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"dup v4.8h, %w2 \n"
|
||||
"ushr v4.8h, v4.8h, #1 \n" // scale >>= 1
|
||||
@ -2283,7 +2358,9 @@ void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
|
||||
// Shade 8 pixels at a time by specified value.
|
||||
// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
|
||||
// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
|
||||
void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
|
||||
void ARGBShadeRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int width,
|
||||
uint32 value) {
|
||||
asm volatile (
|
||||
"dup v0.4s, %w3 \n" // duplicate scale value.
|
||||
@ -2393,8 +2470,10 @@ void ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
|
||||
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
|
||||
// TODO(fbarchard): Was same as Sepia except matrix is provided. This function
|
||||
// needs to saturate. Consider doing a non-saturating version.
|
||||
void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
const int8* matrix_argb, int width) {
|
||||
void ARGBColorMatrixRow_NEON(const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
const int8* matrix_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors.
|
||||
@ -2455,8 +2534,10 @@ void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
|
||||
|
||||
// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
|
||||
// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||
void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width) {
|
||||
void ARGBMultiplyRow_NEON(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// 8 pixel loop.
|
||||
"1: \n"
|
||||
@ -2487,8 +2568,10 @@ void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
}
|
||||
|
||||
// Add 2 rows of ARGB pixels together, 8 pixels at a time.
|
||||
void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width) {
|
||||
void ARGBAddRow_NEON(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// 8 pixel loop.
|
||||
"1: \n"
|
||||
@ -2515,8 +2598,10 @@ void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
}
|
||||
|
||||
// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
|
||||
void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
uint8* dst_argb, int width) {
|
||||
void ARGBSubtractRow_NEON(const uint8* src_argb0,
|
||||
const uint8* src_argb1,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// 8 pixel loop.
|
||||
"1: \n"
|
||||
@ -2547,8 +2632,10 @@ void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
|
||||
// R = Sobel
|
||||
// G = Sobel
|
||||
// B = Sobel
|
||||
void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
void SobelRow_NEON(const uint8* src_sobelx,
|
||||
const uint8* src_sobely,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movi v3.8b, #255 \n" // alpha
|
||||
// 8 pixel loop.
|
||||
@ -2574,8 +2661,10 @@ void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
}
|
||||
|
||||
// Adds Sobel X and Sobel Y and stores Sobel into plane.
|
||||
void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_y, int width) {
|
||||
void SobelToPlaneRow_NEON(const uint8* src_sobelx,
|
||||
const uint8* src_sobely,
|
||||
uint8* dst_y,
|
||||
int width) {
|
||||
asm volatile (
|
||||
// 16 pixel loop.
|
||||
"1: \n"
|
||||
@ -2602,8 +2691,10 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
// R = Sobel X
|
||||
// G = Sobel
|
||||
// B = Sobel Y
|
||||
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
uint8* dst_argb, int width) {
|
||||
void SobelXYRow_NEON(const uint8* src_sobelx,
|
||||
const uint8* src_sobely,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"movi v3.8b, #255 \n" // alpha
|
||||
// 8 pixel loop.
|
||||
@ -2630,8 +2721,11 @@ void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
|
||||
// -1 0 1
|
||||
// -2 0 2
|
||||
// -1 0 1
|
||||
void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
const uint8* src_y2, uint8* dst_sobelx, int width) {
|
||||
void SobelXRow_NEON(const uint8* src_y0,
|
||||
const uint8* src_y1,
|
||||
const uint8* src_y2,
|
||||
uint8* dst_sobelx,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -2673,8 +2767,10 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
// -1 -2 -1
|
||||
// 0 0 0
|
||||
// 1 2 1
|
||||
void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
||||
uint8* dst_sobely, int width) {
|
||||
void SobelYRow_NEON(const uint8* src_y0,
|
||||
const uint8* src_y1,
|
||||
uint8* dst_sobely,
|
||||
int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
|
||||
3495
source/row_win.cc
3495
source/row_win.cc
File diff suppressed because it is too large
Load Diff
686
source/scale.cc
686
source/scale.cc
File diff suppressed because it is too large
Load Diff
@ -19,16 +19,15 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
|
||||
#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
|
||||
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
|
||||
int dst_width, int x, int dx) { \
|
||||
int n = dst_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
|
||||
} \
|
||||
TERP_C(dst_ptr + n * BPP, src_ptr, \
|
||||
dst_width & MASK, x + n * dx, dx); \
|
||||
}
|
||||
#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
|
||||
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, \
|
||||
int dx) { \
|
||||
int n = dst_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
|
||||
} \
|
||||
TERP_C(dst_ptr + n * BPP, src_ptr, dst_width & MASK, x + n * dx, dx); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEFILTERCOLS_NEON
|
||||
CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
|
||||
@ -37,167 +36,302 @@ CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
|
||||
CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBFILTERCOLS_NEON
|
||||
CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
|
||||
ScaleARGBFilterCols_C, 4, 3)
|
||||
CANY(ScaleARGBFilterCols_Any_NEON,
|
||||
ScaleARGBFilterCols_NEON,
|
||||
ScaleARGBFilterCols_C,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#undef CANY
|
||||
|
||||
// Fixed scale down.
|
||||
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
|
||||
uint8* dst_ptr, int dst_width) { \
|
||||
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, \
|
||||
int dst_width) { \
|
||||
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
// Fixed scale down for odd source width. Used by I420Blend subsampling.
|
||||
// Since dst_width is (width + 1) / 2, this function scales one less pixel
|
||||
// and copies the last pixel.
|
||||
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
|
||||
uint8* dst_ptr, int dst_width) { \
|
||||
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, \
|
||||
int dst_width) { \
|
||||
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN2_SSSE3
|
||||
SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
|
||||
ScaleRowDown2Linear_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
|
||||
2, 1, 15)
|
||||
SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_Odd_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_SSSE3,
|
||||
ScaleRowDown2Linear_SSSE3,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
SDANY(ScaleRowDown2Box_Any_SSSE3,
|
||||
ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
SDODD(ScaleRowDown2Box_Odd_SSSE3,
|
||||
ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_AVX2
|
||||
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
|
||||
SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
|
||||
ScaleRowDown2Linear_C, 2, 1, 31)
|
||||
SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
|
||||
2, 1, 31)
|
||||
SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C,
|
||||
2, 1, 31)
|
||||
SDANY(ScaleRowDown2Linear_Any_AVX2,
|
||||
ScaleRowDown2Linear_AVX2,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
SDANY(ScaleRowDown2Box_Any_AVX2,
|
||||
ScaleRowDown2Box_AVX2,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
SDODD(ScaleRowDown2Box_Odd_AVX2,
|
||||
ScaleRowDown2Box_AVX2,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_NEON
|
||||
SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
|
||||
ScaleRowDown2Linear_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_C, 2, 1, 15)
|
||||
SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_Odd_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_NEON,
|
||||
ScaleRowDown2Linear_NEON,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
SDANY(ScaleRowDown2Box_Any_NEON,
|
||||
ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
SDODD(ScaleRowDown2Box_Odd_NEON,
|
||||
ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_SSSE3
|
||||
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C,
|
||||
4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_SSSE3,
|
||||
ScaleRowDown4Box_SSSE3,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_AVX2
|
||||
SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
|
||||
SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C,
|
||||
4, 1, 15)
|
||||
SDANY(ScaleRowDown4Box_Any_AVX2,
|
||||
ScaleRowDown4Box_AVX2,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_NEON
|
||||
SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
|
||||
4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_NEON,
|
||||
ScaleRowDown4Box_NEON,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN34_SSSE3
|
||||
SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3,
|
||||
ScaleRowDown34_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3,
|
||||
ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3,
|
||||
ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_Any_SSSE3,
|
||||
ScaleRowDown34_SSSE3,
|
||||
ScaleRowDown34_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
|
||||
ScaleRowDown34_0_Box_SSSE3,
|
||||
ScaleRowDown34_0_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
|
||||
ScaleRowDown34_1_Box_SSSE3,
|
||||
ScaleRowDown34_1_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN34_NEON
|
||||
SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON,
|
||||
ScaleRowDown34_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON,
|
||||
ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON,
|
||||
ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_Any_NEON,
|
||||
ScaleRowDown34_NEON,
|
||||
ScaleRowDown34_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_NEON,
|
||||
ScaleRowDown34_0_Box_NEON,
|
||||
ScaleRowDown34_0_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_NEON,
|
||||
ScaleRowDown34_1_Box_NEON,
|
||||
ScaleRowDown34_1_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN38_SSSE3
|
||||
SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3,
|
||||
ScaleRowDown38_C, 8 / 3, 1, 11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3,
|
||||
ScaleRowDown38_3_Box_C, 8 / 3, 1, 5)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3,
|
||||
ScaleRowDown38_2_Box_C, 8 / 3, 1, 5)
|
||||
SDANY(ScaleRowDown38_Any_SSSE3,
|
||||
ScaleRowDown38_SSSE3,
|
||||
ScaleRowDown38_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
|
||||
ScaleRowDown38_3_Box_SSSE3,
|
||||
ScaleRowDown38_3_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
5)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
|
||||
ScaleRowDown38_2_Box_SSSE3,
|
||||
ScaleRowDown38_2_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
5)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN38_NEON
|
||||
SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON,
|
||||
ScaleRowDown38_C, 8 / 3, 1, 11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON,
|
||||
ScaleRowDown38_3_Box_C, 8 / 3, 1, 11)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON,
|
||||
ScaleRowDown38_2_Box_C, 8 / 3, 1, 11)
|
||||
SDANY(ScaleRowDown38_Any_NEON,
|
||||
ScaleRowDown38_NEON,
|
||||
ScaleRowDown38_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_NEON,
|
||||
ScaleRowDown38_3_Box_NEON,
|
||||
ScaleRowDown38_3_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_NEON,
|
||||
ScaleRowDown38_2_Box_NEON,
|
||||
ScaleRowDown38_2_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEARGBROWDOWN2_SSE2
|
||||
SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2,
|
||||
ScaleARGBRowDown2_C, 2, 4, 3)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2,
|
||||
ScaleARGBRowDown2Linear_C, 2, 4, 3)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2,
|
||||
ScaleARGBRowDown2Box_C, 2, 4, 3)
|
||||
SDANY(ScaleARGBRowDown2_Any_SSE2,
|
||||
ScaleARGBRowDown2_SSE2,
|
||||
ScaleARGBRowDown2_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_SSE2,
|
||||
ScaleARGBRowDown2Linear_SSE2,
|
||||
ScaleARGBRowDown2Linear_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_SSE2,
|
||||
ScaleARGBRowDown2Box_SSE2,
|
||||
ScaleARGBRowDown2Box_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBROWDOWN2_NEON
|
||||
SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON,
|
||||
ScaleARGBRowDown2_C, 2, 4, 7)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON,
|
||||
ScaleARGBRowDown2Linear_C, 2, 4, 7)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON,
|
||||
ScaleARGBRowDown2Box_C, 2, 4, 7)
|
||||
SDANY(ScaleARGBRowDown2_Any_NEON,
|
||||
ScaleARGBRowDown2_NEON,
|
||||
ScaleARGBRowDown2_C,
|
||||
2,
|
||||
4,
|
||||
7)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_NEON,
|
||||
ScaleARGBRowDown2Linear_NEON,
|
||||
ScaleARGBRowDown2Linear_C,
|
||||
2,
|
||||
4,
|
||||
7)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_NEON,
|
||||
ScaleARGBRowDown2Box_NEON,
|
||||
ScaleARGBRowDown2Box_C,
|
||||
2,
|
||||
4,
|
||||
7)
|
||||
#endif
|
||||
#undef SDANY
|
||||
|
||||
// Scale down by even scale factor.
|
||||
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
|
||||
uint8* dst_ptr, int dst_width) { \
|
||||
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \
|
||||
src_stepx, dst_ptr + n * BPP, r); \
|
||||
}
|
||||
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
|
||||
uint8* dst_ptr, int dst_width) { \
|
||||
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
|
||||
SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2,
|
||||
ScaleARGBRowDownEven_C, 4, 3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2,
|
||||
ScaleARGBRowDownEvenBox_C, 4, 3)
|
||||
SDAANY(ScaleARGBRowDownEven_Any_SSE2,
|
||||
ScaleARGBRowDownEven_SSE2,
|
||||
ScaleARGBRowDownEven_C,
|
||||
4,
|
||||
3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2,
|
||||
ScaleARGBRowDownEvenBox_SSE2,
|
||||
ScaleARGBRowDownEvenBox_C,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
|
||||
SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON,
|
||||
ScaleARGBRowDownEven_C, 4, 3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
|
||||
ScaleARGBRowDownEvenBox_C, 4, 3)
|
||||
SDAANY(ScaleARGBRowDownEven_Any_NEON,
|
||||
ScaleARGBRowDownEven_NEON,
|
||||
ScaleARGBRowDownEven_C,
|
||||
4,
|
||||
3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_NEON,
|
||||
ScaleARGBRowDownEvenBox_NEON,
|
||||
ScaleARGBRowDownEvenBox_C,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
|
||||
// Add rows box filter scale down.
|
||||
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
|
||||
int n = src_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
|
||||
}
|
||||
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
|
||||
int n = src_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEADDROW_SSE2
|
||||
SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
|
||||
@ -214,8 +348,3 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -30,20 +30,28 @@ static __inline int Abs(int v) {
|
||||
// ScaleARGB ARGB, 1/2
|
||||
// This is an optimized version for scaling down a ARGB to 1/2 of
|
||||
// its original size.
|
||||
static void ScaleARGBDown2(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
static void ScaleARGBDown2(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) =
|
||||
filtering == kFilterNone ? ScaleARGBRowDown2_C :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
|
||||
ScaleARGBRowDown2Box_C);
|
||||
assert(dx == 65536 * 2); // Test scale factor of 2.
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_C
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
|
||||
: ScaleARGBRowDown2Box_C);
|
||||
assert(dx == 65536 * 2); // Test scale factor of 2.
|
||||
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
|
||||
// Advance to odd row, even column.
|
||||
if (filtering == kFilterBilinear) {
|
||||
@ -54,25 +62,33 @@ static void ScaleARGBDown2(int src_width, int src_height,
|
||||
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
|
||||
ScaleARGBRowDown2Box_Any_SSE2);
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_Any_SSE2
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
|
||||
: ScaleARGBRowDown2Box_Any_SSE2);
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
|
||||
ScaleARGBRowDown2Box_SSE2);
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_SSE2
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
|
||||
: ScaleARGBRowDown2Box_SSE2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
|
||||
ScaleARGBRowDown2Box_Any_NEON);
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_Any_NEON
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
|
||||
: ScaleARGBRowDown2Box_Any_NEON);
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
|
||||
ScaleARGBRowDown2Box_NEON);
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_NEON
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
|
||||
: ScaleARGBRowDown2Box_NEON);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -90,21 +106,29 @@ static void ScaleARGBDown2(int src_width, int src_height,
|
||||
// ScaleARGB ARGB, 1/4
|
||||
// This is an optimized version for scaling down a ARGB to 1/4 of
|
||||
// its original size.
|
||||
static void ScaleARGBDown4Box(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
static void ScaleARGBDown4Box(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy) {
|
||||
int j;
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
|
||||
uint8* dst_argb, int dst_width) =
|
||||
ScaleARGBRowDown2Box_C;
|
||||
// Advance to odd row, even column.
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
assert(dx == 65536 * 4); // Test scale factor of 4.
|
||||
assert(dx == 65536 * 4); // Test scale factor of 4.
|
||||
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
@ -125,8 +149,8 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
|
||||
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
|
||||
row + kRowSize, dst_width * 2);
|
||||
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
|
||||
dst_width * 2);
|
||||
ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
|
||||
src_argb += row_stride;
|
||||
dst_argb += dst_stride;
|
||||
@ -137,11 +161,18 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
|
||||
// ScaleARGB ARGB Even
|
||||
// This is an optimized version for scaling down a ARGB to even
|
||||
// multiple of its original size.
|
||||
static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
static void ScaleARGBDownEven(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
int col_step = dx >> 16;
|
||||
@ -154,21 +185,21 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
|
||||
ScaleARGBRowDownEven_Any_SSE2;
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
|
||||
: ScaleARGBRowDownEven_Any_SSE2;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
|
||||
ScaleARGBRowDownEven_SSE2;
|
||||
ScaleARGBRowDownEven =
|
||||
filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
|
||||
ScaleARGBRowDownEven_Any_NEON;
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
|
||||
: ScaleARGBRowDownEven_Any_NEON;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
|
||||
ScaleARGBRowDownEven_NEON;
|
||||
ScaleARGBRowDownEven =
|
||||
filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -184,25 +215,32 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
}
|
||||
|
||||
// Scale ARGB down with bilinear interpolation.
|
||||
static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
static void ScaleARGBBilinearDown(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
|
||||
int64 xlast = x + (int64)(dst_width - 1) * dx;
|
||||
int64 xl = (dx >= 0) ? x : xlast;
|
||||
int64 xr = (dx >= 0) ? xlast : x;
|
||||
int clip_src_width;
|
||||
xl = (xl >> 16) & ~3; // Left edge aligned.
|
||||
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
|
||||
xl = (xl >> 16) & ~3; // Left edge aligned.
|
||||
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
|
||||
xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
|
||||
if (xr > src_width) {
|
||||
xr = src_width;
|
||||
@ -235,8 +273,8 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
|
||||
IS_ALIGNED(src_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(clip_src_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
@ -286,18 +324,25 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
}
|
||||
|
||||
// Scale ARGB up with bilinear interpolation.
|
||||
static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
static void ScaleARGBBilinearUp(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
@ -325,14 +370,14 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
|
||||
IS_ALIGNED(dst_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
if (src_width >= 32768) {
|
||||
ScaleARGBFilterCols = filtering ?
|
||||
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
ScaleARGBFilterCols =
|
||||
filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
}
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
||||
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
@ -423,8 +468,10 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
|
||||
#ifdef YUVSCALEUP
|
||||
// Scale YUV to ARGB up with bilinear interpolation.
|
||||
static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
static void ScaleYUVToARGBBilinearUp(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride_y,
|
||||
int src_stride_u,
|
||||
int src_stride_v,
|
||||
@ -433,14 +480,15 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
|
||||
const uint8* v_buf, uint8* rgb_buf, int width) =
|
||||
I422ToARGBRow_C;
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
@ -483,9 +531,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
@ -511,18 +559,18 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
|
||||
IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
if (src_width >= 32768) {
|
||||
ScaleARGBFilterCols = filtering ?
|
||||
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
ScaleARGBFilterCols =
|
||||
filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
}
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
||||
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
@ -643,14 +691,21 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
// of x and dx is the integer part of the source position and
|
||||
// the lower 16 bits are the fixed decimal part.
|
||||
|
||||
static void ScaleARGBSimple(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
static void ScaleARGBSimple(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy) {
|
||||
int j;
|
||||
void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
void (*ScaleARGBCols)(uint8 * dst_argb, const uint8* src_argb, int dst_width,
|
||||
int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
|
||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
||||
@ -675,8 +730,8 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
||||
}
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
|
||||
dst_width, x, dx);
|
||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
|
||||
dx);
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
@ -685,11 +740,18 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
||||
// ScaleARGB a ARGB.
|
||||
// This function in turn calls a scaling function
|
||||
// suitable for handling the desired resolutions.
|
||||
static void ScaleARGB(const uint8* src, int src_stride,
|
||||
int src_width, int src_height,
|
||||
uint8* dst, int dst_stride,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
static void ScaleARGB(const uint8* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||
int x = 0;
|
||||
@ -698,8 +760,7 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
||||
int dy = 0;
|
||||
// ARGB does not support box filter yet, but allow the user to pass it.
|
||||
// Simplify filtering when possible.
|
||||
filtering = ScaleFilterReduce(src_width, src_height,
|
||||
dst_width, dst_height,
|
||||
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
|
||||
filtering);
|
||||
|
||||
// Negative src_height means invert the image.
|
||||
@ -708,17 +769,17 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
||||
src = src + (src_height - 1) * src_stride;
|
||||
src_stride = -src_stride;
|
||||
}
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
|
||||
&x, &y, &dx, &dy);
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
|
||||
&dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
if (clip_x) {
|
||||
int64 clipf = (int64)(clip_x) * dx;
|
||||
int64 clipf = (int64)(clip_x)*dx;
|
||||
x += (clipf & 0xffff);
|
||||
src += (clipf >> 16) * 4;
|
||||
dst += clip_x * 4;
|
||||
}
|
||||
if (clip_y) {
|
||||
int64 clipf = (int64)(clip_y) * dy;
|
||||
int64 clipf = (int64)(clip_y)*dy;
|
||||
y += (clipf & 0xffff);
|
||||
src += (clipf >> 16) * src_stride;
|
||||
dst += clip_y * dst_stride;
|
||||
@ -733,24 +794,20 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
||||
if (!(dx & 0x10000) && !(dy & 0x10000)) {
|
||||
if (dx == 0x20000) {
|
||||
// Optimized 1/2 downsample.
|
||||
ScaleARGBDown2(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||
filtering);
|
||||
return;
|
||||
}
|
||||
if (dx == 0x40000 && filtering == kFilterBox) {
|
||||
// Optimized 1/4 box downsample.
|
||||
ScaleARGBDown4Box(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy);
|
||||
ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy);
|
||||
return;
|
||||
}
|
||||
ScaleARGBDownEven(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||
filtering);
|
||||
return;
|
||||
}
|
||||
// Optimized odd scale down. ie 3, 5, 7, 9x.
|
||||
@ -767,96 +824,103 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
||||
}
|
||||
if (dx == 0x10000 && (x & 0xffff) == 0) {
|
||||
// Arbitrary scale vertically, but unscaled vertically.
|
||||
ScalePlaneVertical(src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, y, dy, 4, filtering);
|
||||
ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
|
||||
dst_stride, src, dst, x, y, dy, 4, filtering);
|
||||
return;
|
||||
}
|
||||
if (filtering && dy < 65536) {
|
||||
ScaleARGBBilinearUp(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||
filtering);
|
||||
return;
|
||||
}
|
||||
if (filtering) {
|
||||
ScaleARGBBilinearDown(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||
filtering);
|
||||
return;
|
||||
}
|
||||
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy);
|
||||
ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
|
||||
dst_stride, src, dst, x, dx, y, dy);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
int ARGBScaleClip(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_argb || src_width == 0 || src_height == 0 ||
|
||||
!dst_argb || dst_width <= 0 || dst_height <= 0 ||
|
||||
clip_x < 0 || clip_y < 0 ||
|
||||
if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
|
||||
dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
|
||||
clip_width > 32768 || clip_height > 32768 ||
|
||||
(clip_x + clip_width) > dst_width ||
|
||||
(clip_y + clip_height) > dst_height) {
|
||||
return -1;
|
||||
}
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
|
||||
dst_argb, dst_stride_argb, dst_width, dst_height,
|
||||
clip_x, clip_y, clip_width, clip_height, filtering);
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
|
||||
dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
|
||||
clip_height, filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Scale an ARGB image.
|
||||
LIBYUV_API
|
||||
int ARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int ARGBScale(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_argb || src_width == 0 || src_height == 0 ||
|
||||
src_width > 32768 || src_height > 32768 ||
|
||||
!dst_argb || dst_width <= 0 || dst_height <= 0) {
|
||||
if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
|
||||
src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
|
||||
dst_argb, dst_stride_argb, dst_width, dst_height,
|
||||
0, 0, dst_width, dst_height, filtering);
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
|
||||
dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
|
||||
filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int YUVToARGBScaleClip(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint32 src_fourcc,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
uint32 dst_fourcc,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
|
||||
int r;
|
||||
I420ToARGB(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
argb_buffer, src_width * 4,
|
||||
src_width, src_height);
|
||||
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
|
||||
argb_buffer, src_width * 4, src_width, src_height);
|
||||
|
||||
r = ARGBScaleClip(argb_buffer, src_width * 4,
|
||||
src_width, src_height,
|
||||
dst_argb, dst_stride_argb,
|
||||
dst_width, dst_height,
|
||||
clip_x, clip_y, clip_width, clip_height,
|
||||
filtering);
|
||||
r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
|
||||
dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
|
||||
clip_width, clip_height, filtering);
|
||||
free(argb_buffer);
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -28,8 +28,10 @@ static __inline int Abs(int v) {
|
||||
}
|
||||
|
||||
// CPU agnostic row functions
|
||||
void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = src_ptr[1];
|
||||
@ -42,8 +44,10 @@ void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width) {
|
||||
void ScaleRowDown2_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = src_ptr[1];
|
||||
@ -56,8 +60,10 @@ void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Linear_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
const uint8* s = src_ptr;
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
@ -71,8 +77,10 @@ void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width) {
|
||||
void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width) {
|
||||
const uint16* s = src_ptr;
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
@ -86,8 +94,10 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
const uint8* s = src_ptr;
|
||||
const uint8* t = src_ptr + src_stride;
|
||||
int x;
|
||||
@ -103,8 +113,10 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
const uint8* s = src_ptr;
|
||||
const uint8* t = src_ptr + src_stride;
|
||||
int x;
|
||||
@ -125,8 +137,10 @@ void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
dst[0] = (s[0] + t[0] + 1) >> 1;
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width) {
|
||||
void ScaleRowDown2Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width) {
|
||||
const uint16* s = src_ptr;
|
||||
const uint16* t = src_ptr + src_stride;
|
||||
int x;
|
||||
@ -142,8 +156,10 @@ void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown4_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = src_ptr[2];
|
||||
@ -156,8 +172,10 @@ void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width) {
|
||||
void ScaleRowDown4_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = src_ptr[2];
|
||||
@ -170,80 +188,86 @@ void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown4Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] +
|
||||
src_ptr[stride + 2] + src_ptr[stride + 3] +
|
||||
src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
|
||||
src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
|
||||
src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
|
||||
8) >> 4;
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
|
||||
src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
|
||||
src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
|
||||
src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
|
||||
src_ptr[stride * 3 + 3] + 8) >>
|
||||
4;
|
||||
dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5] +
|
||||
src_ptr[stride + 6] + src_ptr[stride + 7] +
|
||||
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
|
||||
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
|
||||
src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
|
||||
src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
|
||||
8) >> 4;
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
|
||||
src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
|
||||
src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
|
||||
src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
|
||||
src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
|
||||
src_ptr[stride * 3 + 7] + 8) >>
|
||||
4;
|
||||
dst += 2;
|
||||
src_ptr += 8;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] +
|
||||
src_ptr[stride + 2] + src_ptr[stride + 3] +
|
||||
src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
|
||||
src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
|
||||
src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
|
||||
8) >> 4;
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
|
||||
src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
|
||||
src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
|
||||
src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
|
||||
src_ptr[stride * 3 + 3] + 8) >>
|
||||
4;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width) {
|
||||
void ScaleRowDown4Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int x;
|
||||
for (x = 0; x < dst_width - 1; x += 2) {
|
||||
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] +
|
||||
src_ptr[stride + 2] + src_ptr[stride + 3] +
|
||||
src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
|
||||
src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
|
||||
src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
|
||||
8) >> 4;
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
|
||||
src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
|
||||
src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
|
||||
src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
|
||||
src_ptr[stride * 3 + 3] + 8) >>
|
||||
4;
|
||||
dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5] +
|
||||
src_ptr[stride + 6] + src_ptr[stride + 7] +
|
||||
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
|
||||
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
|
||||
src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
|
||||
src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
|
||||
8) >> 4;
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
|
||||
src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
|
||||
src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
|
||||
src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
|
||||
src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
|
||||
src_ptr[stride * 3 + 7] + 8) >>
|
||||
4;
|
||||
dst += 2;
|
||||
src_ptr += 8;
|
||||
}
|
||||
if (dst_width & 1) {
|
||||
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] +
|
||||
src_ptr[stride + 2] + src_ptr[stride + 3] +
|
||||
src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
|
||||
src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
|
||||
src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
|
||||
8) >> 4;
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
|
||||
src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
|
||||
src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
|
||||
src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
|
||||
src_ptr[stride * 3 + 3] + 8) >>
|
||||
4;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown34_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (x = 0; x < dst_width; x += 3) {
|
||||
@ -255,8 +279,10 @@ void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width) {
|
||||
void ScaleRowDown34_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (x = 0; x < dst_width; x += 3) {
|
||||
@ -269,8 +295,10 @@ void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
|
||||
// Filter rows 0 and 1 together, 3 : 1
|
||||
void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* d,
|
||||
int dst_width) {
|
||||
const uint8* s = src_ptr;
|
||||
const uint8* t = src_ptr + src_stride;
|
||||
int x;
|
||||
@ -291,8 +319,10 @@ void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* d, int dst_width) {
|
||||
void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* d,
|
||||
int dst_width) {
|
||||
const uint16* s = src_ptr;
|
||||
const uint16* t = src_ptr + src_stride;
|
||||
int x;
|
||||
@ -314,8 +344,10 @@ void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
|
||||
// Filter rows 1 and 2 together, 1 : 1
|
||||
void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* d,
|
||||
int dst_width) {
|
||||
const uint8* s = src_ptr;
|
||||
const uint8* t = src_ptr + src_stride;
|
||||
int x;
|
||||
@ -336,8 +368,10 @@ void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* d, int dst_width) {
|
||||
void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* d,
|
||||
int dst_width) {
|
||||
const uint16* s = src_ptr;
|
||||
const uint16* t = src_ptr + src_stride;
|
||||
int x;
|
||||
@ -359,8 +393,11 @@ void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
|
||||
// Scales a single row of pixels using point sampling.
|
||||
void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleCols_C(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
dst_ptr[0] = src_ptr[x >> 16];
|
||||
@ -374,8 +411,11 @@ void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleCols_16_C(uint16* dst_ptr,
|
||||
const uint16* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
dst_ptr[0] = src_ptr[x >> 16];
|
||||
@ -390,8 +430,11 @@ void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
}
|
||||
|
||||
// Scales a single row of pixels up by 2x using point sampling.
|
||||
void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleColsUp2_C(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
dst_ptr[1] = dst_ptr[0] = src_ptr[0];
|
||||
@ -403,8 +446,11 @@ void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleColsUp2_16_C(uint16* dst_ptr,
|
||||
const uint16* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
dst_ptr[1] = dst_ptr[0] = src_ptr[0];
|
||||
@ -418,16 +464,19 @@ void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
|
||||
// (1-f)a + fb can be replaced with a + f(b-a)
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
||||
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
#define BLENDER(a, b, f) \
|
||||
(uint8)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
#else
|
||||
// Intel uses 7 bit math with rounding.
|
||||
#define BLENDER(a, b, f) (uint8)((int)(a) + \
|
||||
(((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
|
||||
#define BLENDER(a, b, f) \
|
||||
(uint8)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
|
||||
#endif
|
||||
|
||||
void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleFilterCols_C(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
int xi = x >> 16;
|
||||
@ -450,8 +499,11 @@ void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x32, int dx) {
|
||||
void ScaleFilterCols64_C(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x32,
|
||||
int dx) {
|
||||
int64 x = (int64)(x32);
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
@ -477,11 +529,14 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
|
||||
#undef BLENDER
|
||||
|
||||
// Same as 8 bit arm blender but return is cast to uint16
|
||||
#define BLENDER(a, b, f) (uint16)((int)(a) + \
|
||||
((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
#define BLENDER(a, b, f) \
|
||||
(uint16)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
|
||||
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleFilterCols_16_C(uint16* dst_ptr,
|
||||
const uint16* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
int xi = x >> 16;
|
||||
@ -504,8 +559,11 @@ void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
int dst_width, int x32, int dx) {
|
||||
void ScaleFilterCols64_16_C(uint16* dst_ptr,
|
||||
const uint16* src_ptr,
|
||||
int dst_width,
|
||||
int x32,
|
||||
int dx) {
|
||||
int64 x = (int64)(x32);
|
||||
int j;
|
||||
for (j = 0; j < dst_width - 1; j += 2) {
|
||||
@ -530,8 +588,10 @@ void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
|
||||
}
|
||||
#undef BLENDER
|
||||
|
||||
void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown38_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
assert(dst_width % 3 == 0);
|
||||
for (x = 0; x < dst_width; x += 3) {
|
||||
@ -543,8 +603,10 @@ void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst, int dst_width) {
|
||||
void ScaleRowDown38_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst,
|
||||
int dst_width) {
|
||||
int x;
|
||||
assert(dst_width % 3 == 0);
|
||||
for (x = 0; x < dst_width; x += 3) {
|
||||
@ -559,25 +621,29 @@ void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
// 8x3 -> 3x1
|
||||
void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int i;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (i = 0; i < dst_width; i += 3) {
|
||||
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] +
|
||||
src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
|
||||
(65536 / 9) >> 16;
|
||||
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
|
||||
src_ptr[stride + 3] + src_ptr[stride + 4] +
|
||||
src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
|
||||
(65536 / 9) >> 16;
|
||||
dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
|
||||
src_ptr[stride + 6] + src_ptr[stride + 7] +
|
||||
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
|
||||
(65536 / 6) >> 16;
|
||||
dst_ptr[0] =
|
||||
(src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
|
||||
src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
|
||||
(65536 / 9) >>
|
||||
16;
|
||||
dst_ptr[1] =
|
||||
(src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
|
||||
(65536 / 9) >>
|
||||
16;
|
||||
dst_ptr[2] =
|
||||
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
|
||||
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
src_ptr += 8;
|
||||
dst_ptr += 3;
|
||||
}
|
||||
@ -585,66 +651,80 @@ void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
|
||||
|
||||
void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int dst_width) {
|
||||
uint16* dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int i;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (i = 0; i < dst_width; i += 3) {
|
||||
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] +
|
||||
src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
|
||||
(65536 / 9) >> 16;
|
||||
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
|
||||
src_ptr[stride + 3] + src_ptr[stride + 4] +
|
||||
src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
|
||||
(65536 / 9) >> 16;
|
||||
dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
|
||||
src_ptr[stride + 6] + src_ptr[stride + 7] +
|
||||
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
|
||||
(65536 / 6) >> 16;
|
||||
dst_ptr[0] =
|
||||
(src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
|
||||
src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
|
||||
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
|
||||
(65536 / 9) >>
|
||||
16;
|
||||
dst_ptr[1] =
|
||||
(src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
|
||||
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
|
||||
(65536 / 9) >>
|
||||
16;
|
||||
dst_ptr[2] =
|
||||
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
|
||||
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
src_ptr += 8;
|
||||
dst_ptr += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// 8x2 -> 3x1
|
||||
void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int i;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (i = 0; i < dst_width; i += 3) {
|
||||
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] +
|
||||
src_ptr[stride + 2]) * (65536 / 6) >> 16;
|
||||
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
|
||||
src_ptr[stride + 3] + src_ptr[stride + 4] +
|
||||
src_ptr[stride + 5]) * (65536 / 6) >> 16;
|
||||
dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
|
||||
src_ptr[stride + 6] + src_ptr[stride + 7]) *
|
||||
(65536 / 4) >> 16;
|
||||
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
|
||||
src_ptr[stride + 1] + src_ptr[stride + 2]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
dst_ptr[2] =
|
||||
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
|
||||
(65536 / 4) >>
|
||||
16;
|
||||
src_ptr += 8;
|
||||
dst_ptr += 3;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
int i;
|
||||
assert((dst_width % 3 == 0) && (dst_width > 0));
|
||||
for (i = 0; i < dst_width; i += 3) {
|
||||
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
|
||||
src_ptr[stride + 0] + src_ptr[stride + 1] +
|
||||
src_ptr[stride + 2]) * (65536 / 6) >> 16;
|
||||
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
|
||||
src_ptr[stride + 3] + src_ptr[stride + 4] +
|
||||
src_ptr[stride + 5]) * (65536 / 6) >> 16;
|
||||
dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
|
||||
src_ptr[stride + 6] + src_ptr[stride + 7]) *
|
||||
(65536 / 4) >> 16;
|
||||
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
|
||||
src_ptr[stride + 1] + src_ptr[stride + 2]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
|
||||
src_ptr[stride + 4] + src_ptr[stride + 5]) *
|
||||
(65536 / 6) >>
|
||||
16;
|
||||
dst_ptr[2] =
|
||||
(src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
|
||||
(65536 / 4) >>
|
||||
16;
|
||||
src_ptr += 8;
|
||||
dst_ptr += 3;
|
||||
}
|
||||
@ -680,7 +760,8 @@ void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
|
||||
|
||||
void ScaleARGBRowDown2_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
const uint32* src = (const uint32*)(src_argb);
|
||||
uint32* dst = (uint32*)(dst_argb);
|
||||
|
||||
@ -698,7 +779,8 @@ void ScaleARGBRowDown2_C(const uint8* src_argb,
|
||||
|
||||
void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
for (x = 0; x < dst_width; ++x) {
|
||||
dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
|
||||
@ -710,26 +792,34 @@ void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
void ScaleARGBRowDown2Box_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
for (x = 0; x < dst_width; ++x) {
|
||||
dst_argb[0] = (src_argb[0] + src_argb[4] +
|
||||
src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
|
||||
dst_argb[1] = (src_argb[1] + src_argb[5] +
|
||||
src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
|
||||
dst_argb[2] = (src_argb[2] + src_argb[6] +
|
||||
src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
|
||||
dst_argb[3] = (src_argb[3] + src_argb[7] +
|
||||
src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
|
||||
dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
|
||||
src_argb[src_stride + 4] + 2) >>
|
||||
2;
|
||||
dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
|
||||
src_argb[src_stride + 5] + 2) >>
|
||||
2;
|
||||
dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
|
||||
src_argb[src_stride + 6] + 2) >>
|
||||
2;
|
||||
dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
|
||||
src_argb[src_stride + 7] + 2) >>
|
||||
2;
|
||||
src_argb += 8;
|
||||
dst_argb += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
void ScaleARGBRowDownEven_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
const uint32* src = (const uint32*)(src_argb);
|
||||
uint32* dst = (uint32*)(dst_argb);
|
||||
|
||||
@ -748,25 +838,33 @@ void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
int x;
|
||||
for (x = 0; x < dst_width; ++x) {
|
||||
dst_argb[0] = (src_argb[0] + src_argb[4] +
|
||||
src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
|
||||
dst_argb[1] = (src_argb[1] + src_argb[5] +
|
||||
src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
|
||||
dst_argb[2] = (src_argb[2] + src_argb[6] +
|
||||
src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
|
||||
dst_argb[3] = (src_argb[3] + src_argb[7] +
|
||||
src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
|
||||
dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
|
||||
src_argb[src_stride + 4] + 2) >>
|
||||
2;
|
||||
dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
|
||||
src_argb[src_stride + 5] + 2) >>
|
||||
2;
|
||||
dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
|
||||
src_argb[src_stride + 6] + 2) >>
|
||||
2;
|
||||
dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
|
||||
src_argb[src_stride + 7] + 2) >>
|
||||
2;
|
||||
src_argb += src_stepx * 4;
|
||||
dst_argb += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Scales a single row of pixels using point sampling.
|
||||
void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBCols_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint32* src = (const uint32*)(src_argb);
|
||||
uint32* dst = (uint32*)(dst_argb);
|
||||
int j;
|
||||
@ -782,8 +880,11 @@ void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x32, int dx) {
|
||||
void ScaleARGBCols64_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x32,
|
||||
int dx) {
|
||||
int64 x = (int64)(x32);
|
||||
const uint32* src = (const uint32*)(src_argb);
|
||||
uint32* dst = (uint32*)(dst_argb);
|
||||
@ -801,8 +902,11 @@ void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
|
||||
}
|
||||
|
||||
// Scales a single row of pixels up by 2x using point sampling.
|
||||
void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBColsUp2_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint32* src = (const uint32*)(src_argb);
|
||||
uint32* dst = (uint32*)(dst_argb);
|
||||
int j;
|
||||
@ -818,15 +922,18 @@ void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
|
||||
|
||||
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
|
||||
// Mimics SSSE3 blender
|
||||
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
|
||||
#define BLENDERC(a, b, f, s) (uint32)( \
|
||||
BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
|
||||
#define BLENDER(a, b, f) \
|
||||
BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
|
||||
BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
|
||||
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
|
||||
#define BLENDERC(a, b, f, s) \
|
||||
(uint32)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
|
||||
#define BLENDER(a, b, f) \
|
||||
BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
|
||||
BLENDERC(a, b, f, 0)
|
||||
|
||||
void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBFilterCols_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint32* src = (const uint32*)(src_argb);
|
||||
uint32* dst = (uint32*)(dst_argb);
|
||||
int j;
|
||||
@ -854,8 +961,11 @@ void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x32, int dx) {
|
||||
void ScaleARGBFilterCols64_C(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x32,
|
||||
int dx) {
|
||||
int64 x = (int64)(x32);
|
||||
const uint32* src = (const uint32*)(src_argb);
|
||||
uint32* dst = (uint32*)(dst_argb);
|
||||
@ -889,16 +999,22 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
|
||||
|
||||
// Scale plane vertically with bilinear interpolation.
|
||||
void ScalePlaneVertical(int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int y, int dy,
|
||||
int bpp, enum FilterMode filtering) {
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8* src_argb,
|
||||
uint8* dst_argb,
|
||||
int x,
|
||||
int y,
|
||||
int dy,
|
||||
int bpp,
|
||||
enum FilterMode filtering) {
|
||||
// TODO(fbarchard): Allow higher bpp.
|
||||
int dst_width_bytes = dst_width * bpp;
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
|
||||
int j;
|
||||
assert(bpp >= 1 && bpp <= 4);
|
||||
@ -931,9 +1047,9 @@ void ScalePlaneVertical(int src_height,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
|
||||
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
|
||||
IS_ALIGNED(dst_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(dst_width_bytes, 4)) {
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
@ -948,23 +1064,29 @@ void ScalePlaneVertical(int src_height,
|
||||
}
|
||||
yi = y >> 16;
|
||||
yf = filtering ? ((y >> 8) & 255) : 0;
|
||||
InterpolateRow(dst_argb, src_argb + yi * src_stride,
|
||||
src_stride, dst_width_bytes, yf);
|
||||
InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
|
||||
dst_width_bytes, yf);
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
void ScalePlaneVertical_16(int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint16* src_argb, uint16* dst_argb,
|
||||
int x, int y, int dy,
|
||||
int wpp, enum FilterMode filtering) {
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint16* src_argb,
|
||||
uint16* dst_argb,
|
||||
int x,
|
||||
int y,
|
||||
int dy,
|
||||
int wpp,
|
||||
enum FilterMode filtering) {
|
||||
// TODO(fbarchard): Allow higher wpp.
|
||||
int dst_width_words = dst_width * wpp;
|
||||
void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_16_C;
|
||||
void (*InterpolateRow)(uint16 * dst_argb, const uint16* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_16_C;
|
||||
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
|
||||
int j;
|
||||
assert(wpp >= 1 && wpp <= 2);
|
||||
@ -1005,9 +1127,9 @@ void ScalePlaneVertical_16(int src_height,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_16_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
|
||||
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
|
||||
IS_ALIGNED(dst_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_Any_16_DSPR2;
|
||||
if (IS_ALIGNED(dst_width_bytes, 4)) {
|
||||
InterpolateRow = InterpolateRow_16_DSPR2;
|
||||
@ -1022,16 +1144,18 @@ void ScalePlaneVertical_16(int src_height,
|
||||
}
|
||||
yi = y >> 16;
|
||||
yf = filtering ? ((y >> 8) & 255) : 0;
|
||||
InterpolateRow(dst_argb, src_argb + yi * src_stride,
|
||||
src_stride, dst_width_words, yf);
|
||||
InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
|
||||
dst_width_words, yf);
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
|
||||
// Simplify the filtering based on scale factors.
|
||||
enum FilterMode ScaleFilterReduce(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
enum FilterMode ScaleFilterReduce(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
if (src_width < 0) {
|
||||
src_width = -src_width;
|
||||
@ -1078,17 +1202,21 @@ int FixedDiv_C(int num, int div) {
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv1_C(int num, int div) {
|
||||
return (int)((((int64)(num) << 16) - 0x00010001) /
|
||||
(div - 1));
|
||||
return (int)((((int64)(num) << 16) - 0x00010001) / (div - 1));
|
||||
}
|
||||
|
||||
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
|
||||
|
||||
// Compute slope values for stepping.
|
||||
void ScaleSlope(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
void ScaleSlope(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering,
|
||||
int* x, int* y, int* dx, int* dy) {
|
||||
int* x,
|
||||
int* y,
|
||||
int* dx,
|
||||
int* dy) {
|
||||
assert(x != NULL);
|
||||
assert(y != NULL);
|
||||
assert(dx != NULL);
|
||||
@ -1120,7 +1248,7 @@ void ScaleSlope(int src_width, int src_height,
|
||||
*x = 0;
|
||||
}
|
||||
if (dst_height <= src_height) {
|
||||
*dy = FixedDiv(src_height, dst_height);
|
||||
*dy = FixedDiv(src_height, dst_height);
|
||||
*y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
|
||||
} else if (dst_height > 1) {
|
||||
*dy = FixedDiv1(src_height, dst_height);
|
||||
|
||||
@ -21,85 +21,81 @@ extern "C" {
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
|
||||
// Offsets for source bytes 0 to 9
|
||||
static uvec8 kShuf0 =
|
||||
{ 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
|
||||
static uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9,
|
||||
128, 128, 128, 128, 128, 128, 128, 128};
|
||||
|
||||
// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
|
||||
static uvec8 kShuf1 =
|
||||
{ 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
|
||||
static uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12,
|
||||
128, 128, 128, 128, 128, 128, 128, 128};
|
||||
|
||||
// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
|
||||
static uvec8 kShuf2 =
|
||||
{ 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
|
||||
static uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15,
|
||||
128, 128, 128, 128, 128, 128, 128, 128};
|
||||
|
||||
// Offsets for source bytes 0 to 10
|
||||
static uvec8 kShuf01 =
|
||||
{ 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
|
||||
static uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10};
|
||||
|
||||
// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
|
||||
static uvec8 kShuf11 =
|
||||
{ 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
|
||||
static uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13};
|
||||
|
||||
// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
|
||||
static uvec8 kShuf21 =
|
||||
{ 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
|
||||
static uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10,
|
||||
10, 11, 12, 13, 13, 14, 14, 15};
|
||||
|
||||
// Coefficients for source bytes 0 to 10
|
||||
static uvec8 kMadd01 =
|
||||
{ 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
|
||||
static uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2};
|
||||
|
||||
// Coefficients for source bytes 10 to 21
|
||||
static uvec8 kMadd11 =
|
||||
{ 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
|
||||
static uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1};
|
||||
|
||||
// Coefficients for source bytes 21 to 31
|
||||
static uvec8 kMadd21 =
|
||||
{ 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
|
||||
static uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3};
|
||||
|
||||
// Coefficients for source bytes 21 to 31
|
||||
static vec16 kRound34 =
|
||||
{ 2, 2, 2, 2, 2, 2, 2, 2 };
|
||||
static vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2};
|
||||
|
||||
static uvec8 kShuf38a =
|
||||
{ 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
|
||||
static uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128};
|
||||
|
||||
static uvec8 kShuf38b =
|
||||
{ 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
|
||||
static uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3,
|
||||
6, 8, 11, 14, 128, 128, 128, 128};
|
||||
|
||||
// Arrange words 0,3,6 into 0,1,2
|
||||
static uvec8 kShufAc =
|
||||
{ 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
|
||||
static uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128,
|
||||
128, 128, 128, 128, 128, 128, 128, 128};
|
||||
|
||||
// Arrange words 0,3,6 into 3,4,5
|
||||
static uvec8 kShufAc3 =
|
||||
{ 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
|
||||
static uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1,
|
||||
6, 7, 12, 13, 128, 128, 128, 128};
|
||||
|
||||
// Scaling values for boxes of 3x3 and 2x3
|
||||
static uvec16 kScaleAc33 =
|
||||
{ 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
|
||||
static uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9,
|
||||
65536 / 9, 65536 / 6, 0, 0};
|
||||
|
||||
// Arrange first value for pixels 0,1,2,3,4,5
|
||||
static uvec8 kShufAb0 =
|
||||
{ 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
|
||||
static uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128,
|
||||
11, 128, 14, 128, 128, 128, 128, 128};
|
||||
|
||||
// Arrange second value for pixels 0,1,2,3,4,5
|
||||
static uvec8 kShufAb1 =
|
||||
{ 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
|
||||
static uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128,
|
||||
12, 128, 15, 128, 128, 128, 128, 128};
|
||||
|
||||
// Arrange third value for pixels 0,1,2,3,4,5
|
||||
static uvec8 kShufAb2 =
|
||||
{ 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
|
||||
static uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128,
|
||||
13, 128, 128, 128, 128, 128, 128, 128};
|
||||
|
||||
// Scaling values for boxes of 3x2 and 2x2
|
||||
static uvec16 kScaleAb2 =
|
||||
{ 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
|
||||
static uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3,
|
||||
65536 / 3, 65536 / 2, 0, 0};
|
||||
|
||||
// GCC versions of row functions are verbatim conversions from Visual C.
|
||||
// Generated using gcc disassembly on Visual C object file:
|
||||
// objdump -D yuvscaler.obj >yuvscaler.txt
|
||||
|
||||
void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -120,8 +116,10 @@ void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
"psrlw $0xf,%%xmm4 \n"
|
||||
@ -149,8 +147,10 @@ void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
"psrlw $0xf,%%xmm4 \n"
|
||||
@ -189,8 +189,10 @@ void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN2_AVX2
|
||||
void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -213,8 +215,10 @@ void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2Linear_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
@ -244,8 +248,10 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown2Box_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
@ -286,8 +292,10 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
#endif // HAS_SCALEROWDOWN2_AVX2
|
||||
|
||||
void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown4_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrld $0x18,%%xmm5 \n"
|
||||
@ -314,8 +322,10 @@ void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown4Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stridex3;
|
||||
asm volatile (
|
||||
"pcmpeqb %%xmm4,%%xmm4 \n"
|
||||
@ -368,10 +378,11 @@ void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN4_AVX2
|
||||
void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown4_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
"vpsrld $0x18,%%ymm5,%%ymm5 \n"
|
||||
@ -400,8 +411,10 @@ void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown4Box_AVX2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
|
||||
"vpsrlw $0xf,%%ymm4,%%ymm4 \n"
|
||||
@ -455,17 +468,19 @@ void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
#endif // HAS_SCALEROWDOWN4_AVX2
|
||||
|
||||
void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm3 \n"
|
||||
"movdqa %1,%%xmm4 \n"
|
||||
"movdqa %2,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kShuf0), // %0
|
||||
"m"(kShuf1), // %1
|
||||
"m"(kShuf2) // %2
|
||||
);
|
||||
void ScaleRowDown34_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm3 \n"
|
||||
"movdqa %1,%%xmm4 \n"
|
||||
"movdqa %2,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kShuf0), // %0
|
||||
"m"(kShuf1), // %1
|
||||
"m"(kShuf2) // %2
|
||||
);
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -492,25 +507,26 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm2 \n" // kShuf01
|
||||
"movdqa %1,%%xmm3 \n" // kShuf11
|
||||
"movdqa %2,%%xmm4 \n" // kShuf21
|
||||
:
|
||||
: "m"(kShuf01), // %0
|
||||
"m"(kShuf11), // %1
|
||||
"m"(kShuf21) // %2
|
||||
);
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm5 \n" // kMadd01
|
||||
"movdqa %1,%%xmm0 \n" // kMadd11
|
||||
"movdqa %2,%%xmm1 \n" // kRound34
|
||||
:
|
||||
: "m"(kMadd01), // %0
|
||||
"m"(kMadd11), // %1
|
||||
"m"(kRound34) // %2
|
||||
);
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm2 \n" // kShuf01
|
||||
"movdqa %1,%%xmm3 \n" // kShuf11
|
||||
"movdqa %2,%%xmm4 \n" // kShuf21
|
||||
:
|
||||
: "m"(kShuf01), // %0
|
||||
"m"(kShuf11), // %1
|
||||
"m"(kShuf21) // %2
|
||||
);
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm5 \n" // kMadd01
|
||||
"movdqa %1,%%xmm0 \n" // kMadd11
|
||||
"movdqa %2,%%xmm1 \n" // kRound34
|
||||
:
|
||||
: "m"(kMadd01), // %0
|
||||
"m"(kMadd11), // %1
|
||||
"m"(kRound34) // %2
|
||||
);
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -557,25 +573,26 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
|
||||
|
||||
void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm2 \n" // kShuf01
|
||||
"movdqa %1,%%xmm3 \n" // kShuf11
|
||||
"movdqa %2,%%xmm4 \n" // kShuf21
|
||||
:
|
||||
: "m"(kShuf01), // %0
|
||||
"m"(kShuf11), // %1
|
||||
"m"(kShuf21) // %2
|
||||
);
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm5 \n" // kMadd01
|
||||
"movdqa %1,%%xmm0 \n" // kMadd11
|
||||
"movdqa %2,%%xmm1 \n" // kRound34
|
||||
:
|
||||
: "m"(kMadd01), // %0
|
||||
"m"(kMadd11), // %1
|
||||
"m"(kRound34) // %2
|
||||
);
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm2 \n" // kShuf01
|
||||
"movdqa %1,%%xmm3 \n" // kShuf11
|
||||
"movdqa %2,%%xmm4 \n" // kShuf21
|
||||
:
|
||||
: "m"(kShuf01), // %0
|
||||
"m"(kShuf11), // %1
|
||||
"m"(kShuf21) // %2
|
||||
);
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm5 \n" // kMadd01
|
||||
"movdqa %1,%%xmm0 \n" // kMadd11
|
||||
"movdqa %2,%%xmm1 \n" // kRound34
|
||||
:
|
||||
: "m"(kMadd01), // %0
|
||||
"m"(kMadd11), // %1
|
||||
"m"(kRound34) // %2
|
||||
);
|
||||
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
@ -624,8 +641,10 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown38_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"movdqa %3,%%xmm4 \n"
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
@ -655,18 +674,19 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm2 \n"
|
||||
"movdqa %1,%%xmm3 \n"
|
||||
"movdqa %2,%%xmm4 \n"
|
||||
"movdqa %3,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kShufAb0), // %0
|
||||
"m"(kShufAb1), // %1
|
||||
"m"(kShufAb2), // %2
|
||||
"m"(kScaleAb2) // %3
|
||||
);
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm2 \n"
|
||||
"movdqa %1,%%xmm3 \n"
|
||||
"movdqa %2,%%xmm4 \n"
|
||||
"movdqa %3,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kShufAb0), // %0
|
||||
"m"(kShufAb1), // %1
|
||||
"m"(kShufAb2), // %2
|
||||
"m"(kScaleAb2) // %3
|
||||
);
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -700,17 +720,18 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
|
||||
|
||||
void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm2 \n"
|
||||
"movdqa %1,%%xmm3 \n"
|
||||
"movdqa %2,%%xmm4 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kShufAc), // %0
|
||||
"m"(kShufAc3), // %1
|
||||
"m"(kScaleAc33) // %2
|
||||
);
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm2 \n"
|
||||
"movdqa %1,%%xmm3 \n"
|
||||
"movdqa %2,%%xmm4 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kShufAc), // %0
|
||||
"m"(kShufAc3), // %1
|
||||
"m"(kScaleAc33) // %2
|
||||
);
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -790,7 +811,6 @@ void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAS_SCALEADDROW_AVX2
|
||||
// Reads 32 bytes and accumulates to 32 shorts at a time.
|
||||
void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
@ -823,17 +843,19 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
|
||||
|
||||
// Constant for making pixels signed to avoid pmaddubsw
|
||||
// saturation.
|
||||
static uvec8 kFsub80 =
|
||||
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
|
||||
static uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
|
||||
|
||||
// Constant for making pixels unsigned and adding .5 for rounding.
|
||||
static uvec16 kFadd40 =
|
||||
{ 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 };
|
||||
static uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040,
|
||||
0x4040, 0x4040, 0x4040, 0x4040};
|
||||
|
||||
// Bilinear column filtering. SSSE3 version.
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleFilterCols_SSSE3(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
intptr_t x0, x1, temp_pixel;
|
||||
asm volatile (
|
||||
"movd %6,%%xmm2 \n"
|
||||
@ -925,8 +947,11 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
|
||||
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
||||
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
||||
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleColsUp2_SSE2(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -950,7 +975,8 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
|
||||
void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -971,7 +997,8 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
|
||||
|
||||
void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -995,7 +1022,8 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
|
||||
|
||||
void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -1025,8 +1053,11 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
|
||||
|
||||
// Reads 4 pixels at a time.
|
||||
// Alignment requirement: dst_argb 16 byte aligned.
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx, uint8* dst_argb, int dst_width) {
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
|
||||
intptr_t src_stepx_x12;
|
||||
asm volatile (
|
||||
@ -1059,8 +1090,10 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
// Blends four 2x2 to 4x1.
|
||||
// Alignment requirement: dst_argb 16 byte aligned.
|
||||
void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int src_stepx,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
|
||||
intptr_t src_stepx_x12;
|
||||
intptr_t row1 = (intptr_t)(src_stride);
|
||||
@ -1102,8 +1135,11 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBCols_SSE2(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
intptr_t x0, x1;
|
||||
asm volatile (
|
||||
"movd %5,%%xmm2 \n"
|
||||
@ -1171,8 +1207,11 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
|
||||
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
||||
// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
|
||||
void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBColsUp2_SSE2(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -1197,26 +1236,29 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
|
||||
|
||||
// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
|
||||
static uvec8 kShuffleColARGB = {
|
||||
0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel
|
||||
8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel
|
||||
0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel
|
||||
8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel
|
||||
};
|
||||
|
||||
// Shuffle table for duplicating 2 fractions into 8 bytes each
|
||||
static uvec8 kShuffleFractions = {
|
||||
0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
|
||||
0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
|
||||
};
|
||||
|
||||
// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
|
||||
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBFilterCols_SSSE3(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
intptr_t x0, x1;
|
||||
asm volatile (
|
||||
"movdqa %0,%%xmm4 \n"
|
||||
"movdqa %1,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kShuffleColARGB), // %0
|
||||
"m"(kShuffleFractions) // %1
|
||||
);
|
||||
asm volatile(
|
||||
"movdqa %0,%%xmm4 \n"
|
||||
"movdqa %1,%%xmm5 \n"
|
||||
:
|
||||
: "m"(kShuffleColARGB), // %0
|
||||
"m"(kShuffleFractions) // %1
|
||||
);
|
||||
|
||||
asm volatile (
|
||||
"movd %5,%%xmm2 \n"
|
||||
@ -1283,34 +1325,32 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
|
||||
|
||||
// Divide num by div and return as 16.16 fixed point result.
|
||||
int FixedDiv_X86(int num, int div) {
|
||||
asm volatile (
|
||||
"cdq \n"
|
||||
"shld $0x10,%%eax,%%edx \n"
|
||||
"shl $0x10,%%eax \n"
|
||||
"idiv %1 \n"
|
||||
"mov %0, %%eax \n"
|
||||
: "+a"(num) // %0
|
||||
: "c"(div) // %1
|
||||
: "memory", "cc", "edx"
|
||||
);
|
||||
asm volatile(
|
||||
"cdq \n"
|
||||
"shld $0x10,%%eax,%%edx \n"
|
||||
"shl $0x10,%%eax \n"
|
||||
"idiv %1 \n"
|
||||
"mov %0, %%eax \n"
|
||||
: "+a"(num) // %0
|
||||
: "c"(div) // %1
|
||||
: "memory", "cc", "edx");
|
||||
return num;
|
||||
}
|
||||
|
||||
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
|
||||
int FixedDiv1_X86(int num, int div) {
|
||||
asm volatile (
|
||||
"cdq \n"
|
||||
"shld $0x10,%%eax,%%edx \n"
|
||||
"shl $0x10,%%eax \n"
|
||||
"sub $0x10001,%%eax \n"
|
||||
"sbb $0x0,%%edx \n"
|
||||
"sub $0x1,%1 \n"
|
||||
"idiv %1 \n"
|
||||
"mov %0, %%eax \n"
|
||||
: "+a"(num) // %0
|
||||
: "c"(div) // %1
|
||||
: "memory", "cc", "edx"
|
||||
);
|
||||
asm volatile(
|
||||
"cdq \n"
|
||||
"shld $0x10,%%eax,%%edx \n"
|
||||
"shl $0x10,%%eax \n"
|
||||
"sub $0x10001,%%eax \n"
|
||||
"sbb $0x0,%%edx \n"
|
||||
"sub $0x1,%1 \n"
|
||||
"idiv %1 \n"
|
||||
"mov %0, %%eax \n"
|
||||
: "+a"(num) // %0
|
||||
: "c"(div) // %1
|
||||
: "memory", "cc", "edx");
|
||||
return num;
|
||||
}
|
||||
|
||||
|
||||
@ -17,168 +17,167 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for GCC MIPS DSPR2
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
|
||||
(__mips_dsp_rev >= 2) && (_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"srl $t9, %[dst_width], 4 \n" // iterations -> by 16
|
||||
"beqz $t9, 2f \n"
|
||||
" nop \n"
|
||||
"srl $t9, %[dst_width], 4 \n" // iterations -> by 16
|
||||
"beqz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
|
||||
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
|
||||
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||
// TODO(fbarchard): Use odd pixels instead of even.
|
||||
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
|
||||
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
|
||||
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
|
||||
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
|
||||
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sw $t8, 0(%[dst]) \n"
|
||||
"sw $t0, 4(%[dst]) \n"
|
||||
"sw $t1, 8(%[dst]) \n"
|
||||
"sw $t2, 12(%[dst]) \n"
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 16 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
|
||||
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
|
||||
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||
// TODO(fbarchard): Use odd pixels instead of even.
|
||||
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
|
||||
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
|
||||
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
|
||||
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
|
||||
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sw $t8, 0(%[dst]) \n"
|
||||
"sw $t0, 4(%[dst]) \n"
|
||||
"sw $t1, 8(%[dst]) \n"
|
||||
"sw $t2, 12(%[dst]) \n"
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 16 \n"
|
||||
|
||||
"2: \n"
|
||||
"andi $t9, %[dst_width], 0xf \n" // residue
|
||||
"beqz $t9, 3f \n"
|
||||
" nop \n"
|
||||
"2: \n"
|
||||
"andi $t9, %[dst_width], 0xf \n" // residue
|
||||
"beqz $t9, 3f \n"
|
||||
" nop \n"
|
||||
|
||||
"21: \n"
|
||||
"lbu $t0, 0(%[src_ptr]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 2 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
"bgtz $t9, 21b \n"
|
||||
" addiu %[dst], %[dst], 1 \n"
|
||||
"21: \n"
|
||||
"lbu $t0, 0(%[src_ptr]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 2 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
"bgtz $t9, 21b \n"
|
||||
" addiu %[dst], %[dst], 1 \n"
|
||||
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst)
|
||||
: [dst_width] "r" (dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9"
|
||||
);
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst)
|
||||
: [dst_width] "r"(dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
const uint8* t = src_ptr + src_stride;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"srl $t9, %[dst_width], 3 \n" // iterations -> step 8
|
||||
"bltz $t9, 2f \n"
|
||||
" nop \n"
|
||||
"srl $t9, %[dst_width], 3 \n" // iterations -> step 8
|
||||
"bltz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t4, 0(%[t]) \n" // |19|18|17|16|
|
||||
"lw $t5, 4(%[t]) \n" // |23|22|21|20|
|
||||
"lw $t6, 8(%[t]) \n" // |27|26|25|24|
|
||||
"lw $t7, 12(%[t]) \n" // |31|30|29|28|
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"srl $t8, $t0, 16 \n" // |X|X|3|2|
|
||||
"ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
|
||||
"ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
|
||||
"raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
|
||||
"raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
|
||||
"shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
|
||||
"shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
|
||||
"srl $t8, $t1, 16 \n" // |X|X|7|6|
|
||||
"ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
|
||||
"ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
|
||||
"raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
|
||||
"raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
|
||||
"shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
|
||||
"shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
|
||||
"srl $t8, $t2, 16 \n" // |X|X|11|10|
|
||||
"ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
|
||||
"ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
|
||||
"raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
|
||||
"raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
|
||||
"shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
|
||||
"shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
|
||||
"srl $t8, $t3, 16 \n" // |X|X|15|14|
|
||||
"ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
|
||||
"ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
|
||||
"raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
|
||||
"raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
|
||||
"shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
|
||||
"shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
|
||||
"addiu %[src_ptr], %[src_ptr], 16 \n"
|
||||
"addiu %[t], %[t], 16 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
"sb $t4, 1(%[dst]) \n"
|
||||
"sb $t1, 2(%[dst]) \n"
|
||||
"sb $t5, 3(%[dst]) \n"
|
||||
"sb $t2, 4(%[dst]) \n"
|
||||
"sb $t6, 5(%[dst]) \n"
|
||||
"sb $t3, 6(%[dst]) \n"
|
||||
"sb $t7, 7(%[dst]) \n"
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 8 \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t4, 0(%[t]) \n" // |19|18|17|16|
|
||||
"lw $t5, 4(%[t]) \n" // |23|22|21|20|
|
||||
"lw $t6, 8(%[t]) \n" // |27|26|25|24|
|
||||
"lw $t7, 12(%[t]) \n" // |31|30|29|28|
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"srl $t8, $t0, 16 \n" // |X|X|3|2|
|
||||
"ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
|
||||
"ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
|
||||
"raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
|
||||
"raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
|
||||
"shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
|
||||
"shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
|
||||
"srl $t8, $t1, 16 \n" // |X|X|7|6|
|
||||
"ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
|
||||
"ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
|
||||
"raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
|
||||
"raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
|
||||
"shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
|
||||
"shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
|
||||
"srl $t8, $t2, 16 \n" // |X|X|11|10|
|
||||
"ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
|
||||
"ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
|
||||
"raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
|
||||
"raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
|
||||
"shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
|
||||
"shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
|
||||
"srl $t8, $t3, 16 \n" // |X|X|15|14|
|
||||
"ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
|
||||
"ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
|
||||
"raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
|
||||
"raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
|
||||
"shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
|
||||
"shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
|
||||
"addiu %[src_ptr], %[src_ptr], 16 \n"
|
||||
"addiu %[t], %[t], 16 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
"sb $t4, 1(%[dst]) \n"
|
||||
"sb $t1, 2(%[dst]) \n"
|
||||
"sb $t5, 3(%[dst]) \n"
|
||||
"sb $t2, 4(%[dst]) \n"
|
||||
"sb $t6, 5(%[dst]) \n"
|
||||
"sb $t3, 6(%[dst]) \n"
|
||||
"sb $t7, 7(%[dst]) \n"
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 8 \n"
|
||||
|
||||
"2: \n"
|
||||
"andi $t9, %[dst_width], 0x7 \n" // x = residue
|
||||
"beqz $t9, 3f \n"
|
||||
" nop \n"
|
||||
"2: \n"
|
||||
"andi $t9, %[dst_width], 0x7 \n" // x = residue
|
||||
"beqz $t9, 3f \n"
|
||||
" nop \n"
|
||||
|
||||
"21: \n"
|
||||
"lwr $t1, 0(%[src_ptr]) \n"
|
||||
"lwl $t1, 3(%[src_ptr]) \n"
|
||||
"lwr $t2, 0(%[t]) \n"
|
||||
"lwl $t2, 3(%[t]) \n"
|
||||
"srl $t8, $t1, 16 \n"
|
||||
"ins $t1, $t2, 16, 16 \n"
|
||||
"ins $t2, $t8, 0, 16 \n"
|
||||
"raddu.w.qb $t1, $t1 \n"
|
||||
"raddu.w.qb $t2, $t2 \n"
|
||||
"shra_r.w $t1, $t1, 2 \n"
|
||||
"shra_r.w $t2, $t2, 2 \n"
|
||||
"sb $t1, 0(%[dst]) \n"
|
||||
"sb $t2, 1(%[dst]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
||||
"addiu $t9, $t9, -2 \n"
|
||||
"addiu %[t], %[t], 4 \n"
|
||||
"bgtz $t9, 21b \n"
|
||||
" addiu %[dst], %[dst], 2 \n"
|
||||
"21: \n"
|
||||
"lwr $t1, 0(%[src_ptr]) \n"
|
||||
"lwl $t1, 3(%[src_ptr]) \n"
|
||||
"lwr $t2, 0(%[t]) \n"
|
||||
"lwl $t2, 3(%[t]) \n"
|
||||
"srl $t8, $t1, 16 \n"
|
||||
"ins $t1, $t2, 16, 16 \n"
|
||||
"ins $t2, $t8, 0, 16 \n"
|
||||
"raddu.w.qb $t1, $t1 \n"
|
||||
"raddu.w.qb $t2, $t2 \n"
|
||||
"shra_r.w $t1, $t1, 2 \n"
|
||||
"shra_r.w $t2, $t2, 2 \n"
|
||||
"sb $t1, 0(%[dst]) \n"
|
||||
"sb $t2, 1(%[dst]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
||||
"addiu $t9, $t9, -2 \n"
|
||||
"addiu %[t], %[t], 4 \n"
|
||||
"bgtz $t9, 21b \n"
|
||||
" addiu %[dst], %[dst], 2 \n"
|
||||
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst), [t] "+r" (t)
|
||||
: [dst_width] "r" (dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9"
|
||||
);
|
||||
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [t] "+r"(t)
|
||||
: [dst_width] "r"(dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
|
||||
}
|
||||
|
||||
void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
void ScaleRowDown4_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
@ -186,7 +185,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"beqz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
@ -208,12 +207,12 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 8 \n"
|
||||
|
||||
"2: \n"
|
||||
"2: \n"
|
||||
"andi $t9, %[dst_width], 7 \n" // residue
|
||||
"beqz $t9, 3f \n"
|
||||
" nop \n"
|
||||
|
||||
"21: \n"
|
||||
"21: \n"
|
||||
"lbu $t1, 0(%[src_ptr]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
@ -221,31 +220,30 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"bgtz $t9, 21b \n"
|
||||
" addiu %[dst], %[dst], 1 \n"
|
||||
|
||||
"3: \n"
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst)
|
||||
: [dst_width] "r" (dst_width)
|
||||
: "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9"
|
||||
);
|
||||
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst)
|
||||
: [dst_width] "r"(dst_width)
|
||||
: "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* s1 = src_ptr + stride;
|
||||
const uint8* s2 = s1 + stride;
|
||||
const uint8* s3 = s2 + stride;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"srl $t9, %[dst_width], 1 \n"
|
||||
"andi $t8, %[dst_width], 1 \n"
|
||||
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
|
||||
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
|
||||
@ -299,23 +297,20 @@ void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst),
|
||||
[s1] "+r" (s1),
|
||||
[s2] "+r" (s2),
|
||||
[s3] "+r" (s3)
|
||||
: [dst_width] "r" (dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6","t7", "t8", "t9"
|
||||
);
|
||||
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [s1] "+r"(s1), [s2] "+r"(s2),
|
||||
[s3] "+r"(s3)
|
||||
: [dst_width] "r"(dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
|
||||
}
|
||||
|
||||
void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
void ScaleRowDown34_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
@ -347,23 +342,21 @@ void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"bnez %[dst_width], 1b \n"
|
||||
" addiu %[dst], %[dst], 24 \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6","t7", "t8", "t9"
|
||||
);
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
|
||||
}
|
||||
|
||||
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* d,
|
||||
int dst_width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"repl.ph $t3, 3 \n" // 0x00030003
|
||||
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
|
||||
"rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
|
||||
@ -400,26 +393,24 @@ void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"sb $t6, 2(%[d]) \n"
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" addiu %[d], %[d], 3 \n"
|
||||
"3: \n"
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[src_stride] "+r" (src_stride),
|
||||
[d] "+r" (d),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d),
|
||||
[dst_width] "+r"(dst_width)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3",
|
||||
"t4", "t5", "t6"
|
||||
);
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6");
|
||||
}
|
||||
|
||||
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* d,
|
||||
int dst_width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"repl.ph $t2, 3 \n" // 0x00030003
|
||||
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
|
||||
"rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
|
||||
@ -452,25 +443,23 @@ void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"sb $t6, 2(%[d]) \n"
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" addiu %[d], %[d], 3 \n"
|
||||
"3: \n"
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[src_stride] "+r" (src_stride),
|
||||
[d] "+r" (d),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [src_ptr] "+r"(src_ptr), [src_stride] "+r"(src_stride), [d] "+r"(d),
|
||||
[dst_width] "+r"(dst_width)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3",
|
||||
"t4", "t5", "t6"
|
||||
);
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6");
|
||||
}
|
||||
|
||||
void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
void ScaleRowDown38_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
@ -501,26 +490,24 @@ void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"bgez $t8, 1b \n"
|
||||
" addiu %[dst], %[dst], 12 \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [src_ptr] "+r"(src_ptr), [dst] "+r"(dst), [dst_width] "+r"(dst_width)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3", "t4",
|
||||
"t5", "t6", "t7", "t8"
|
||||
);
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
|
||||
}
|
||||
|
||||
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* t = src_ptr + stride;
|
||||
const int c = 0x2AAA;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
|
||||
"lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
|
||||
@ -554,18 +541,16 @@ void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" sb $t0, -3(%[dst_ptr]) \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst_ptr] "+r" (dst_ptr),
|
||||
[t] "+r" (t),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [c] "r" (c)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6"
|
||||
);
|
||||
: [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [t] "+r"(t),
|
||||
[dst_width] "+r"(dst_width)
|
||||
: [c] "r"(c)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6");
|
||||
}
|
||||
|
||||
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* s1 = src_ptr + stride;
|
||||
stride += stride;
|
||||
@ -573,11 +558,11 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
const int c1 = 0x1C71;
|
||||
const int c2 = 0x2AAA;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"1: \n"
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
|
||||
"lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
|
||||
@ -624,15 +609,10 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" sb $t0, -3(%[dst_ptr]) \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst_ptr] "+r" (dst_ptr),
|
||||
[s1] "+r" (s1),
|
||||
[s2] "+r" (s2),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [c1] "r" (c1), [c2] "r" (c2)
|
||||
: "t0", "t1", "t2", "t3", "t4",
|
||||
"t5", "t6", "t7", "t8"
|
||||
);
|
||||
: [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [s1] "+r"(s1),
|
||||
[s2] "+r"(s2), [dst_width] "+r"(dst_width)
|
||||
: [c1] "r"(c1), [c2] "r"(c2)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
|
||||
}
|
||||
|
||||
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
@ -641,4 +621,3 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
@ -23,8 +23,10 @@ extern "C" {
|
||||
// Provided by Fritz Koenig
|
||||
|
||||
// Read 32x1 throw away even pixels, and write 16x1.
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
// load even pixels into q0, odd into q1
|
||||
@ -43,8 +45,10 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
|
||||
// Read 32x1 average down and write 16x1.
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -66,8 +70,10 @@ void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
|
||||
// Read 32x2 average down and write 16x1.
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"add %1, %0 \n"
|
||||
@ -95,8 +101,10 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown4_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -113,12 +121,14 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown4Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
const uint8* src_ptr1 = src_ptr + src_stride;
|
||||
const uint8* src_ptr2 = src_ptr + src_stride * 2;
|
||||
const uint8* src_ptr3 = src_ptr + src_stride * 3;
|
||||
asm volatile (
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load up 16x4
|
||||
@ -155,7 +165,8 @@ asm volatile (
|
||||
// Point samples 32 pixels to 24 pixels.
|
||||
void ScaleRowDown34_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -175,7 +186,8 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
|
||||
|
||||
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #3 \n"
|
||||
"add %3, %0 \n"
|
||||
@ -234,7 +246,8 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
||||
|
||||
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"vmov.u8 d24, #3 \n"
|
||||
"add %3, %0 \n"
|
||||
@ -274,21 +287,19 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
||||
}
|
||||
|
||||
#define HAS_SCALEROWDOWN38_NEON
|
||||
static uvec8 kShuf38 =
|
||||
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
|
||||
static uvec8 kShuf38_2 =
|
||||
{ 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
|
||||
static vec16 kMult38_Div6 =
|
||||
{ 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
|
||||
65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
|
||||
static vec16 kMult38_Div9 =
|
||||
{ 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
|
||||
65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
|
||||
static uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0};
|
||||
static uvec8 kShuf38_2 = {0, 8, 16, 2, 10, 17, 4, 12,
|
||||
18, 6, 14, 19, 0, 0, 0, 0};
|
||||
static vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
|
||||
65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12};
|
||||
static vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
|
||||
65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18};
|
||||
|
||||
// 32 -> 12
|
||||
void ScaleRowDown38_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"vld1.8 {q3}, [%3] \n"
|
||||
@ -314,7 +325,8 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
|
||||
// 32x3 -> 12x1
|
||||
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
const uint8* src_ptr1 = src_ptr + src_stride * 2;
|
||||
|
||||
asm volatile (
|
||||
@ -433,7 +445,8 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
||||
// 32x2 -> 12x1
|
||||
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
MEMACCESS(4)
|
||||
"vld1.16 {q13}, [%4] \n"
|
||||
@ -530,8 +543,11 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst_ptr,
|
||||
int src_width,
|
||||
int src_height) {
|
||||
const uint8* src_tmp;
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -563,6 +579,7 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// TODO(Yang Zhang): Investigate less load instructions for
|
||||
// the x/dx stepping
|
||||
#define LOAD2_DATA8_LANE(n) \
|
||||
@ -571,13 +588,17 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"vld2.8 {d6["#n"], d7["#n"]}, [%6] \n"
|
||||
// clang-format on
|
||||
|
||||
// The NEON version mimics this formula (from row_common.cc):
|
||||
// #define BLENDER(a, b, f) (uint8)((int)(a) +
|
||||
// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_ptr;
|
||||
@ -640,8 +661,10 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
|
||||
// 16x2 -> 16x1
|
||||
void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
int dst_width, int source_y_fraction) {
|
||||
const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
int dst_width,
|
||||
int source_y_fraction) {
|
||||
asm volatile (
|
||||
"cmp %4, #0 \n"
|
||||
"beq 100f \n"
|
||||
@ -737,8 +760,10 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
// load even pixels into q0, odd into q1
|
||||
@ -760,8 +785,10 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -788,8 +815,10 @@ void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"add %1, %1, %0 \n"
|
||||
@ -829,8 +858,11 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
// Reads 4 pixels at a time.
|
||||
// Alignment requirement: src_argb 4 byte aligned.
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx, uint8* dst_argb, int dst_width) {
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"mov r12, %3, lsl #2 \n"
|
||||
"1: \n"
|
||||
@ -856,9 +888,11 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
|
||||
// Reads 4 pixels at a time.
|
||||
// Alignment requirement: src_argb 4 byte aligned.
|
||||
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"mov r12, %4, lsl #2 \n"
|
||||
"add %1, %1, %0 \n"
|
||||
@ -902,17 +936,22 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// TODO(Yang Zhang): Investigate less load instructions for
|
||||
// the x/dx stepping
|
||||
#define LOAD1_DATA32_LANE(dn, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"vld1.32 {"#dn"["#n"]}, [%6] \n"
|
||||
#define LOAD1_DATA32_LANE(dn, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"vld1.32 {" #dn "[" #n "]}, [%6] \n"
|
||||
// clang-format on
|
||||
|
||||
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBCols_NEON(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int tmp;
|
||||
const uint8* src_tmp = src_argb;
|
||||
asm volatile (
|
||||
@ -944,17 +983,22 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
|
||||
#undef LOAD1_DATA32_LANE
|
||||
|
||||
// clang-format off
|
||||
// TODO(Yang Zhang): Investigate less load instructions for
|
||||
// the x/dx stepping
|
||||
#define LOAD2_DATA32_LANE(dn1, dn2, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"vld2.32 {"#dn1"["#n"], "#dn2"["#n"]}, [%6] \n"
|
||||
#define LOAD2_DATA32_LANE(dn1, dn2, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"vld2.32 {" #dn1 "[" #n "], " #dn2 "[" #n "]}, [%6] \n"
|
||||
// clang-format on
|
||||
|
||||
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBFilterCols_NEON(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_argb;
|
||||
|
||||
@ -21,8 +21,10 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
// Read 32x1 throw away even pixels, and write 16x1.
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
// load even pixels into v0, odd into v1
|
||||
@ -41,8 +43,10 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
|
||||
// Read 32x1 average down and write 16x1.
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Linear_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -64,8 +68,10 @@ void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
}
|
||||
|
||||
// Read 32x2 average down and write 16x1.
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleRowDown2Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"add %1, %1, %0 \n"
|
||||
@ -93,8 +99,10 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown4_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -111,12 +119,14 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
void ScaleRowDown4Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
const uint8* src_ptr1 = src_ptr + src_stride;
|
||||
const uint8* src_ptr2 = src_ptr + src_stride * 2;
|
||||
const uint8* src_ptr3 = src_ptr + src_stride * 3;
|
||||
asm volatile (
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b}, [%0], #16 \n" // load up 16x4
|
||||
@ -152,7 +162,8 @@ asm volatile (
|
||||
// Point samples 32 pixels to 24 pixels.
|
||||
void ScaleRowDown34_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -172,7 +183,8 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
|
||||
|
||||
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"movi v20.8b, #3 \n"
|
||||
"add %3, %3, %0 \n"
|
||||
@ -232,7 +244,8 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
||||
|
||||
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"movi v20.8b, #3 \n"
|
||||
"add %3, %3, %0 \n"
|
||||
@ -273,21 +286,19 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
static uvec8 kShuf38 =
|
||||
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
|
||||
static uvec8 kShuf38_2 =
|
||||
{ 0, 16, 32, 2, 18, 33, 4, 20, 34, 6, 22, 35, 0, 0, 0, 0 };
|
||||
static vec16 kMult38_Div6 =
|
||||
{ 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
|
||||
65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
|
||||
static vec16 kMult38_Div9 =
|
||||
{ 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
|
||||
65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
|
||||
static uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0};
|
||||
static uvec8 kShuf38_2 = {0, 16, 32, 2, 18, 33, 4, 20,
|
||||
34, 6, 22, 35, 0, 0, 0, 0};
|
||||
static vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
|
||||
65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12};
|
||||
static vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
|
||||
65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18};
|
||||
|
||||
// 32 -> 12
|
||||
void ScaleRowDown38_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"ld1 {v3.16b}, [%3] \n"
|
||||
@ -312,7 +323,8 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
|
||||
// 32x3 -> 12x1
|
||||
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
const uint8* src_ptr1 = src_ptr + src_stride * 2;
|
||||
ptrdiff_t tmp_src_stride = src_stride;
|
||||
|
||||
@ -441,7 +453,8 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
||||
// 32x2 -> 12x1
|
||||
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
uint8* dst_ptr,
|
||||
int dst_width) {
|
||||
// TODO(fbarchard): use src_stride directly for clang 3.5+.
|
||||
ptrdiff_t tmp_src_stride = src_stride;
|
||||
asm volatile (
|
||||
@ -545,8 +558,11 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint16* dst_ptr, int src_width, int src_height) {
|
||||
void ScaleAddRows_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint16* dst_ptr,
|
||||
int src_width,
|
||||
int src_height) {
|
||||
const uint8* src_tmp;
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -578,27 +594,32 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// TODO(Yang Zhang): Investigate less load instructions for
|
||||
// the x/dx stepping
|
||||
#define LOAD2_DATA8_LANE(n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"ld2 {v4.b, v5.b}["#n"], [%6] \n"
|
||||
#define LOAD2_DATA8_LANE(n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"ld2 {v4.b, v5.b}[" #n "], [%6] \n"
|
||||
// clang-format on
|
||||
|
||||
// The NEON version mimics this formula (from row_common.cc):
|
||||
// #define BLENDER(a, b, f) (uint8)((int)(a) +
|
||||
// ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
|
||||
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_ptr;
|
||||
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
|
||||
int64 x64 = (int64) x;
|
||||
int64 dx64 = (int64) dx;
|
||||
int64 dst_width64 = (int64)dst_width; // Work around ios 64 bit warning.
|
||||
int64 x64 = (int64)x;
|
||||
int64 dx64 = (int64)dx;
|
||||
asm volatile (
|
||||
"dup v0.4s, %w3 \n" // x
|
||||
"dup v1.4s, %w4 \n" // dx
|
||||
@ -658,9 +679,11 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
|
||||
// 16x2 -> 16x1
|
||||
void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||
const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
int dst_width, int source_y_fraction) {
|
||||
int y_fraction = 256 - source_y_fraction;
|
||||
const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
int dst_width,
|
||||
int source_y_fraction) {
|
||||
int y_fraction = 256 - source_y_fraction;
|
||||
asm volatile (
|
||||
"cmp %w4, #0 \n"
|
||||
"b.eq 100f \n"
|
||||
@ -756,8 +779,10 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleARGBRowDown2_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
// load even pixels into q0, odd into q1
|
||||
@ -779,8 +804,10 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS (0)
|
||||
@ -806,8 +833,10 @@ void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"add %1, %1, %0 \n"
|
||||
@ -843,8 +872,11 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
|
||||
// Reads 4 pixels at a time.
|
||||
// Alignment requirement: src_argb 4 byte aligned.
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx, uint8* dst_argb, int dst_width) {
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
@ -871,9 +903,11 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
// Alignment requirement: src_argb 4 byte aligned.
|
||||
// TODO(Yang Zhang): Might be worth another optimization pass in future.
|
||||
// It could be upgraded to 8 pixels at a time to start with.
|
||||
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb,
|
||||
ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_argb, int dst_width) {
|
||||
uint8* dst_argb,
|
||||
int dst_width) {
|
||||
asm volatile (
|
||||
"add %1, %1, %0 \n"
|
||||
"1: \n"
|
||||
@ -920,21 +954,26 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
);
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// TODO(Yang Zhang): Investigate less load instructions for
|
||||
// the x/dx stepping
|
||||
#define LOAD1_DATA32_LANE(vn, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"ld1 {"#vn".s}["#n"], [%6] \n"
|
||||
#define LOAD1_DATA32_LANE(vn, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"ld1 {" #vn ".s}[" #n "], [%6] \n"
|
||||
// clang-format on
|
||||
|
||||
void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBCols_NEON(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
const uint8* src_tmp = src_argb;
|
||||
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
|
||||
int64 x64 = (int64) x;
|
||||
int64 dx64 = (int64) dx;
|
||||
int64 dst_width64 = (int64)dst_width; // Work around ios 64 bit warning.
|
||||
int64 x64 = (int64)x;
|
||||
int64 dx64 = (int64)dx;
|
||||
int64 tmp64;
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
@ -965,23 +1004,28 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
|
||||
#undef LOAD1_DATA32_LANE
|
||||
|
||||
// clang-format off
|
||||
// TODO(Yang Zhang): Investigate less load instructions for
|
||||
// the x/dx stepping
|
||||
#define LOAD2_DATA32_LANE(vn1, vn2, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"ld2 {"#vn1".s, "#vn2".s}["#n"], [%6] \n"
|
||||
#define LOAD2_DATA32_LANE(vn1, vn2, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"ld2 {" #vn1 ".s, " #vn2 ".s}[" #n "], [%6] \n"
|
||||
// clang-format on
|
||||
|
||||
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
void ScaleARGBFilterCols_NEON(uint8* dst_argb,
|
||||
const uint8* src_argb,
|
||||
int dst_width,
|
||||
int x,
|
||||
int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_argb;
|
||||
int64 dst_width64 = (int64) dst_width; // Work around ios 64 bit warning.
|
||||
int64 x64 = (int64) x;
|
||||
int64 dx64 = (int64) dx;
|
||||
int64 dst_width64 = (int64)dst_width; // Work around ios 64 bit warning.
|
||||
int64 x64 = (int64)x;
|
||||
int64 dx64 = (int64)dx;
|
||||
asm volatile (
|
||||
"dup v0.4s, %w3 \n" // x
|
||||
"dup v1.4s, %w4 \n" // dx
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,6 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -24,24 +23,24 @@ struct FourCCAliasEntry {
|
||||
};
|
||||
|
||||
static const struct FourCCAliasEntry kFourCCAliases[] = {
|
||||
{FOURCC_IYUV, FOURCC_I420},
|
||||
{FOURCC_YU12, FOURCC_I420},
|
||||
{FOURCC_YU16, FOURCC_I422},
|
||||
{FOURCC_YU24, FOURCC_I444},
|
||||
{FOURCC_YUYV, FOURCC_YUY2},
|
||||
{FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
|
||||
{FOURCC_HDYC, FOURCC_UYVY},
|
||||
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
|
||||
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
|
||||
{FOURCC_DMB1, FOURCC_MJPG},
|
||||
{FOURCC_BA81, FOURCC_BGGR}, // deprecated.
|
||||
{FOURCC_RGB3, FOURCC_RAW },
|
||||
{FOURCC_BGR3, FOURCC_24BG},
|
||||
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
|
||||
{FOURCC_CM24, FOURCC_RAW }, // kCMPixelFormat_24RGB
|
||||
{FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
|
||||
{FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
|
||||
{FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
|
||||
{FOURCC_IYUV, FOURCC_I420},
|
||||
{FOURCC_YU12, FOURCC_I420},
|
||||
{FOURCC_YU16, FOURCC_I422},
|
||||
{FOURCC_YU24, FOURCC_I444},
|
||||
{FOURCC_YUYV, FOURCC_YUY2},
|
||||
{FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
|
||||
{FOURCC_HDYC, FOURCC_UYVY},
|
||||
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
|
||||
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
|
||||
{FOURCC_DMB1, FOURCC_MJPG},
|
||||
{FOURCC_BA81, FOURCC_BGGR}, // deprecated.
|
||||
{FOURCC_RGB3, FOURCC_RAW},
|
||||
{FOURCC_BGR3, FOURCC_24BG},
|
||||
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
|
||||
{FOURCC_CM24, FOURCC_RAW}, // kCMPixelFormat_24RGB
|
||||
{FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
|
||||
{FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
|
||||
{FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
|
||||
};
|
||||
// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
|
||||
// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
|
||||
@ -62,4 +61,3 @@ uint32 CanonicalFourCC(uint32 fourcc) {
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
@ -10,13 +10,13 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/convert_argb.h"
|
||||
#include "libyuv/convert_from.h"
|
||||
#include "libyuv/convert_from_argb.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
@ -38,110 +38,103 @@ namespace libyuv {
|
||||
|
||||
#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \
|
||||
TEST_F(LibYUVColorTest, TESTNAME) { \
|
||||
const int kPixels = benchmark_width_ * benchmark_height_; \
|
||||
const int kHalfPixels = ((benchmark_width_ + 1) / 2) * \
|
||||
((benchmark_height_ + HS1) / HS); \
|
||||
align_buffer_page_end(orig_y, kPixels); \
|
||||
align_buffer_page_end(orig_u, kHalfPixels); \
|
||||
align_buffer_page_end(orig_v, kHalfPixels); \
|
||||
align_buffer_page_end(orig_pixels, kPixels * 4); \
|
||||
align_buffer_page_end(temp_y, kPixels); \
|
||||
align_buffer_page_end(temp_u, kHalfPixels); \
|
||||
align_buffer_page_end(temp_v, kHalfPixels); \
|
||||
align_buffer_page_end(dst_pixels_opt, kPixels * 4); \
|
||||
align_buffer_page_end(dst_pixels_c, kPixels * 4); \
|
||||
const int kPixels = benchmark_width_ * benchmark_height_; \
|
||||
const int kHalfPixels = \
|
||||
((benchmark_width_ + 1) / 2) * ((benchmark_height_ + HS1) / HS); \
|
||||
align_buffer_page_end(orig_y, kPixels); \
|
||||
align_buffer_page_end(orig_u, kHalfPixels); \
|
||||
align_buffer_page_end(orig_v, kHalfPixels); \
|
||||
align_buffer_page_end(orig_pixels, kPixels * 4); \
|
||||
align_buffer_page_end(temp_y, kPixels); \
|
||||
align_buffer_page_end(temp_u, kHalfPixels); \
|
||||
align_buffer_page_end(temp_v, kHalfPixels); \
|
||||
align_buffer_page_end(dst_pixels_opt, kPixels * 4); \
|
||||
align_buffer_page_end(dst_pixels_c, kPixels * 4); \
|
||||
\
|
||||
MemRandomize(orig_pixels, kPixels * 4); \
|
||||
MemRandomize(orig_y, kPixels); \
|
||||
MemRandomize(orig_u, kHalfPixels); \
|
||||
MemRandomize(orig_v, kHalfPixels); \
|
||||
MemRandomize(temp_y, kPixels); \
|
||||
MemRandomize(temp_u, kHalfPixels); \
|
||||
MemRandomize(temp_v, kHalfPixels); \
|
||||
MemRandomize(dst_pixels_opt, kPixels * 4); \
|
||||
MemRandomize(dst_pixels_c, kPixels * 4); \
|
||||
MemRandomize(orig_pixels, kPixels * 4); \
|
||||
MemRandomize(orig_y, kPixels); \
|
||||
MemRandomize(orig_u, kHalfPixels); \
|
||||
MemRandomize(orig_v, kHalfPixels); \
|
||||
MemRandomize(temp_y, kPixels); \
|
||||
MemRandomize(temp_u, kHalfPixels); \
|
||||
MemRandomize(temp_v, kHalfPixels); \
|
||||
MemRandomize(dst_pixels_opt, kPixels * 4); \
|
||||
MemRandomize(dst_pixels_c, kPixels * 4); \
|
||||
\
|
||||
/* The test is overall for color conversion matrix being reversible, so */ \
|
||||
/* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \
|
||||
uint8* p = orig_y; \
|
||||
for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \
|
||||
for (int x = 0; x < benchmark_width_ - 1; x += 2) { \
|
||||
uint8 r = static_cast<uint8>(fastrand()); \
|
||||
p[0] = r; \
|
||||
p[1] = r; \
|
||||
p[HN] = r; \
|
||||
p[HN + 1] = r; \
|
||||
p += 2; \
|
||||
/* The test is overall for color conversion matrix being reversible, so */ \
|
||||
/* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \
|
||||
uint8* p = orig_y; \
|
||||
for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \
|
||||
for (int x = 0; x < benchmark_width_ - 1; x += 2) { \
|
||||
uint8 r = static_cast<uint8>(fastrand()); \
|
||||
p[0] = r; \
|
||||
p[1] = r; \
|
||||
p[HN] = r; \
|
||||
p[HN + 1] = r; \
|
||||
p += 2; \
|
||||
} \
|
||||
if (benchmark_width_ & 1) { \
|
||||
uint8 r = static_cast<uint8>(fastrand()); \
|
||||
p[0] = r; \
|
||||
p[HN] = r; \
|
||||
p += 1; \
|
||||
} \
|
||||
p += HN; \
|
||||
} \
|
||||
if (benchmark_width_ & 1) { \
|
||||
uint8 r = static_cast<uint8>(fastrand()); \
|
||||
p[0] = r; \
|
||||
p[HN] = r; \
|
||||
p += 1; \
|
||||
if ((benchmark_height_ & 1) && HS == 2) { \
|
||||
for (int x = 0; x < benchmark_width_ - 1; x += 2) { \
|
||||
uint8 r = static_cast<uint8>(fastrand()); \
|
||||
p[0] = r; \
|
||||
p[1] = r; \
|
||||
p += 2; \
|
||||
} \
|
||||
if (benchmark_width_ & 1) { \
|
||||
uint8 r = static_cast<uint8>(fastrand()); \
|
||||
p[0] = r; \
|
||||
p += 1; \
|
||||
} \
|
||||
} \
|
||||
p += HN; \
|
||||
} \
|
||||
if ((benchmark_height_ & 1) && HS == 2) { \
|
||||
for (int x = 0; x < benchmark_width_ - 1; x += 2) { \
|
||||
uint8 r = static_cast<uint8>(fastrand()); \
|
||||
p[0] = r; \
|
||||
p[1] = r; \
|
||||
p += 2; \
|
||||
/* Start with YUV converted to ARGB. */ \
|
||||
YUVTOARGB(orig_y, benchmark_width_, orig_u, (benchmark_width_ + 1) / 2, \
|
||||
orig_v, (benchmark_width_ + 1) / 2, orig_pixels, \
|
||||
benchmark_width_ * 4, benchmark_width_, benchmark_height_); \
|
||||
\
|
||||
ARGBTOYUV(orig_pixels, benchmark_width_ * 4, temp_y, benchmark_width_, \
|
||||
temp_u, (benchmark_width_ + 1) / 2, temp_v, \
|
||||
(benchmark_width_ + 1) / 2, benchmark_width_, \
|
||||
benchmark_height_); \
|
||||
\
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
YUVTOARGB(temp_y, benchmark_width_, temp_u, (benchmark_width_ + 1) / 2, \
|
||||
temp_v, (benchmark_width_ + 1) / 2, dst_pixels_c, \
|
||||
benchmark_width_ * 4, benchmark_width_, benchmark_height_); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
\
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
YUVTOARGB(temp_y, benchmark_width_, temp_u, (benchmark_width_ + 1) / 2, \
|
||||
temp_v, (benchmark_width_ + 1) / 2, dst_pixels_opt, \
|
||||
benchmark_width_ * 4, benchmark_width_, benchmark_height_); \
|
||||
} \
|
||||
if (benchmark_width_ & 1) { \
|
||||
uint8 r = static_cast<uint8>(fastrand()); \
|
||||
p[0] = r; \
|
||||
p += 1; \
|
||||
/* Test C and SIMD match. */ \
|
||||
for (int i = 0; i < kPixels * 4; ++i) { \
|
||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
|
||||
} \
|
||||
/* Test SIMD is close to original. */ \
|
||||
for (int i = 0; i < kPixels * 4; ++i) { \
|
||||
EXPECT_NEAR(static_cast<int>(orig_pixels[i]), \
|
||||
static_cast<int>(dst_pixels_opt[i]), DIFF); \
|
||||
} \
|
||||
} \
|
||||
/* Start with YUV converted to ARGB. */ \
|
||||
YUVTOARGB(orig_y, benchmark_width_, \
|
||||
orig_u, (benchmark_width_ + 1) / 2, \
|
||||
orig_v, (benchmark_width_ + 1) / 2, \
|
||||
orig_pixels, benchmark_width_ * 4, \
|
||||
benchmark_width_, benchmark_height_); \
|
||||
\
|
||||
ARGBTOYUV(orig_pixels, benchmark_width_ * 4, \
|
||||
temp_y, benchmark_width_, \
|
||||
temp_u, (benchmark_width_ + 1) / 2, \
|
||||
temp_v, (benchmark_width_ + 1) / 2, \
|
||||
benchmark_width_, benchmark_height_); \
|
||||
\
|
||||
MaskCpuFlags(disable_cpu_flags_); \
|
||||
YUVTOARGB(temp_y, benchmark_width_, \
|
||||
temp_u, (benchmark_width_ + 1) / 2, \
|
||||
temp_v, (benchmark_width_ + 1) / 2, \
|
||||
dst_pixels_c, benchmark_width_ * 4, \
|
||||
benchmark_width_, benchmark_height_); \
|
||||
MaskCpuFlags(benchmark_cpu_info_); \
|
||||
\
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) { \
|
||||
YUVTOARGB(temp_y, benchmark_width_, \
|
||||
temp_u, (benchmark_width_ + 1) / 2, \
|
||||
temp_v, (benchmark_width_ + 1) / 2, \
|
||||
dst_pixels_opt, benchmark_width_ * 4, \
|
||||
benchmark_width_, benchmark_height_); \
|
||||
} \
|
||||
/* Test C and SIMD match. */ \
|
||||
for (int i = 0; i < kPixels * 4; ++i) { \
|
||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
|
||||
} \
|
||||
/* Test SIMD is close to original. */ \
|
||||
for (int i = 0; i < kPixels * 4; ++i) { \
|
||||
EXPECT_NEAR(static_cast<int>(orig_pixels[i]), \
|
||||
static_cast<int>(dst_pixels_opt[i]), DIFF); \
|
||||
} \
|
||||
\
|
||||
free_aligned_buffer_page_end(orig_pixels); \
|
||||
free_aligned_buffer_page_end(orig_y); \
|
||||
free_aligned_buffer_page_end(orig_u); \
|
||||
free_aligned_buffer_page_end(orig_v); \
|
||||
free_aligned_buffer_page_end(temp_y); \
|
||||
free_aligned_buffer_page_end(temp_u); \
|
||||
free_aligned_buffer_page_end(temp_v); \
|
||||
free_aligned_buffer_page_end(dst_pixels_opt); \
|
||||
free_aligned_buffer_page_end(dst_pixels_c); \
|
||||
} \
|
||||
free_aligned_buffer_page_end(orig_pixels); \
|
||||
free_aligned_buffer_page_end(orig_y); \
|
||||
free_aligned_buffer_page_end(orig_u); \
|
||||
free_aligned_buffer_page_end(orig_v); \
|
||||
free_aligned_buffer_page_end(temp_y); \
|
||||
free_aligned_buffer_page_end(temp_u); \
|
||||
free_aligned_buffer_page_end(temp_v); \
|
||||
free_aligned_buffer_page_end(dst_pixels_opt); \
|
||||
free_aligned_buffer_page_end(dst_pixels_c); \
|
||||
}
|
||||
|
||||
TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL)
|
||||
TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL)
|
||||
@ -163,11 +156,8 @@ static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) {
|
||||
memset(orig_v, v, kHalfPixels);
|
||||
|
||||
/* YUV converted to ARGB. */
|
||||
I422ToARGB(orig_y, kWidth,
|
||||
orig_u, (kWidth + 1) / 2,
|
||||
orig_v, (kWidth + 1) / 2,
|
||||
orig_pixels, kWidth * 4,
|
||||
kWidth, kHeight);
|
||||
I422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2,
|
||||
orig_pixels, kWidth * 4, kWidth, kHeight);
|
||||
|
||||
*b = orig_pixels[0];
|
||||
*g = orig_pixels[1];
|
||||
@ -189,11 +179,8 @@ static void YUVJToRGB(int y, int u, int v, int* r, int* g, int* b) {
|
||||
memset(orig_v, v, kHalfPixels);
|
||||
|
||||
/* YUV converted to ARGB. */
|
||||
J422ToARGB(orig_y, kWidth,
|
||||
orig_u, (kWidth + 1) / 2,
|
||||
orig_v, (kWidth + 1) / 2,
|
||||
orig_pixels, kWidth * 4,
|
||||
kWidth, kHeight);
|
||||
J422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2,
|
||||
orig_pixels, kWidth * 4, kWidth, kHeight);
|
||||
|
||||
*b = orig_pixels[0];
|
||||
*g = orig_pixels[1];
|
||||
@ -248,7 +235,7 @@ static void YJToRGB(int y, int* r, int* g, int* b) {
|
||||
|
||||
#if defined(CLAMPMETHOD_IF)
|
||||
static int RoundToByte(float f) {
|
||||
int i = ROUND(f);
|
||||
int i = ROUND(f);
|
||||
if (i < 0) {
|
||||
i = 0;
|
||||
}
|
||||
@ -259,52 +246,61 @@ static int RoundToByte(float f) {
|
||||
}
|
||||
#elif defined(CLAMPMETHOD_TABLE)
|
||||
static const unsigned char clamptable[811] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
||||
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
|
||||
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
|
||||
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
|
||||
86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
|
||||
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
|
||||
119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
|
||||
134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,
|
||||
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163,
|
||||
164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178,
|
||||
179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193,
|
||||
194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
|
||||
209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
|
||||
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
|
||||
239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
|
||||
254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
||||
};
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
|
||||
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
|
||||
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
|
||||
69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
|
||||
84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
|
||||
99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
|
||||
114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128,
|
||||
129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
|
||||
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158,
|
||||
159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
|
||||
174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
|
||||
189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203,
|
||||
204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
|
||||
219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
|
||||
234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248,
|
||||
249, 250, 251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
||||
255};
|
||||
|
||||
static int RoundToByte(float f) {
|
||||
return clamptable[ROUND(f) + 276];
|
||||
@ -317,7 +313,7 @@ static int RoundToByte(float f) {
|
||||
#elif defined(CLAMPMETHOD_MASK)
|
||||
static int RoundToByte(float f) {
|
||||
int i = ROUND(f);
|
||||
i = ((-(i) >> 31) & (i)); // clamp to 0.
|
||||
i = ((-(i) >> 31) & (i)); // clamp to 0.
|
||||
return (((255 - (i)) >> 31) | (i)) & 255; // clamp to 255.
|
||||
}
|
||||
#endif
|
||||
@ -433,7 +429,6 @@ TEST_F(LibYUVColorTest, TestGreyYUV) {
|
||||
EXPECT_EQ(130, g1);
|
||||
EXPECT_EQ(130, b1);
|
||||
|
||||
|
||||
for (int y = 0; y < 256; ++y) {
|
||||
YUVToRGBReference(y, 128, 128, &r0, &g0, &b0);
|
||||
YUVToRGB(y, 128, 128, &r1, &g1, &b1);
|
||||
@ -477,7 +472,17 @@ static void PrintHistogram(int rh[256], int gh[256], int bh[256]) {
|
||||
}
|
||||
|
||||
TEST_F(LibYUVColorTest, TestFullYUV) {
|
||||
int rh[256] = { 0, }, gh[256] = { 0, }, bh[256] = { 0, };
|
||||
int rh[256] =
|
||||
{
|
||||
0,
|
||||
},
|
||||
gh[256] =
|
||||
{
|
||||
0,
|
||||
},
|
||||
bh[256] = {
|
||||
0,
|
||||
};
|
||||
for (int u = 0; u < 256; ++u) {
|
||||
for (int v = 0; v < 256; ++v) {
|
||||
for (int y2 = 0; y2 < 256; ++y2) {
|
||||
@ -498,7 +503,17 @@ TEST_F(LibYUVColorTest, TestFullYUV) {
|
||||
}
|
||||
|
||||
TEST_F(LibYUVColorTest, TestFullYUVJ) {
|
||||
int rh[256] = { 0, }, gh[256] = { 0, }, bh[256] = { 0, };
|
||||
int rh[256] =
|
||||
{
|
||||
0,
|
||||
},
|
||||
gh[256] =
|
||||
{
|
||||
0,
|
||||
},
|
||||
bh[256] = {
|
||||
0,
|
||||
};
|
||||
for (int u = 0; u < 256; ++u) {
|
||||
for (int v = 0; v < 256; ++v) {
|
||||
for (int y2 = 0; y2 < 256; ++y2) {
|
||||
|
||||
@ -36,7 +36,8 @@ TEST_F(LibYUVBaseTest, Djb2_Test) {
|
||||
align_buffer_page_end(src_a, kMaxTest);
|
||||
align_buffer_page_end(src_b, kMaxTest);
|
||||
|
||||
const char* fox = "The quick brown fox jumps over the lazy dog"
|
||||
const char* fox =
|
||||
"The quick brown fox jumps over the lazy dog"
|
||||
" and feels as if he were in the seventh heaven of typography"
|
||||
" together with Hermann Zapf";
|
||||
uint32 foxhash = HashDjb2(reinterpret_cast<const uint8*>(fox), 131, 5381);
|
||||
@ -155,19 +156,19 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Opt) {
|
||||
}
|
||||
|
||||
src_a[0] = 0;
|
||||
fourcc = ARGBDetect(src_a, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_,
|
||||
benchmark_height_);
|
||||
EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_BGRA), fourcc);
|
||||
src_a[0] = 255;
|
||||
src_a[3] = 0;
|
||||
fourcc = ARGBDetect(src_a, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_,
|
||||
benchmark_height_);
|
||||
EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_ARGB), fourcc);
|
||||
src_a[3] = 255;
|
||||
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
fourcc = ARGBDetect(src_a, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_,
|
||||
benchmark_height_);
|
||||
}
|
||||
EXPECT_EQ(0u, fourcc);
|
||||
|
||||
@ -183,19 +184,19 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Unaligned) {
|
||||
}
|
||||
|
||||
src_a[0 + 1] = 0;
|
||||
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_,
|
||||
benchmark_height_);
|
||||
EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_BGRA), fourcc);
|
||||
src_a[0 + 1] = 255;
|
||||
src_a[3 + 1] = 0;
|
||||
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_,
|
||||
benchmark_height_);
|
||||
EXPECT_EQ(static_cast<uint32>(libyuv::FOURCC_ARGB), fourcc);
|
||||
src_a[3 + 1] = 255;
|
||||
|
||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4,
|
||||
benchmark_width_, benchmark_height_);
|
||||
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_,
|
||||
benchmark_height_);
|
||||
}
|
||||
EXPECT_EQ(0u, fourcc);
|
||||
|
||||
@ -220,8 +221,9 @@ TEST_F(LibYUVBaseTest, BenchmarkSumSquareError_Opt) {
|
||||
memset(src_a, 0, kMaxWidth);
|
||||
memset(src_b, 0, kMaxWidth);
|
||||
|
||||
int count = benchmark_iterations_ *
|
||||
((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth);
|
||||
int count =
|
||||
benchmark_iterations_ *
|
||||
((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
h1 = ComputeSumSquareError(src_a, src_b, kMaxWidth);
|
||||
}
|
||||
@ -284,8 +286,7 @@ TEST_F(LibYUVBaseTest, BenchmarkPsnr_Opt) {
|
||||
|
||||
double opt_time = get_time();
|
||||
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||
CalcFramePsnr(src_a, benchmark_width_,
|
||||
src_b, benchmark_width_,
|
||||
CalcFramePsnr(src_a, benchmark_width_, src_b, benchmark_width_,
|
||||
benchmark_width_, benchmark_height_);
|
||||
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations_;
|
||||
@ -309,8 +310,7 @@ TEST_F(LibYUVBaseTest, BenchmarkPsnr_Unaligned) {
|
||||
|
||||
double opt_time = get_time();
|
||||
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||
CalcFramePsnr(src_a + 1, benchmark_width_,
|
||||
src_b, benchmark_width_,
|
||||
CalcFramePsnr(src_a + 1, benchmark_width_, src_b, benchmark_width_,
|
||||
benchmark_width_, benchmark_height_);
|
||||
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations_;
|
||||
@ -335,24 +335,24 @@ TEST_F(LibYUVBaseTest, Psnr) {
|
||||
|
||||
double err;
|
||||
err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
EXPECT_EQ(err, kMaxPsnr);
|
||||
|
||||
memset(src_a, 255, kSrcPlaneSize);
|
||||
|
||||
err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
EXPECT_EQ(err, 0.0);
|
||||
|
||||
memset(src_a, 1, kSrcPlaneSize);
|
||||
|
||||
err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
EXPECT_GT(err, 48.0);
|
||||
EXPECT_LT(err, 49.0);
|
||||
@ -362,8 +362,8 @@ TEST_F(LibYUVBaseTest, Psnr) {
|
||||
}
|
||||
|
||||
err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
EXPECT_GT(err, 2.0);
|
||||
if (kSrcWidth * kSrcHeight >= 256) {
|
||||
@ -384,14 +384,14 @@ TEST_F(LibYUVBaseTest, Psnr) {
|
||||
double c_err, opt_err;
|
||||
|
||||
c_err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
|
||||
opt_err = CalcFramePsnr(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
EXPECT_EQ(opt_err, c_err);
|
||||
|
||||
@ -411,8 +411,7 @@ TEST_F(LibYUVBaseTest, DISABLED_BenchmarkSsim_Opt) {
|
||||
|
||||
double opt_time = get_time();
|
||||
for (int i = 0; i < benchmark_iterations_; ++i)
|
||||
CalcFrameSsim(src_a, benchmark_width_,
|
||||
src_b, benchmark_width_,
|
||||
CalcFrameSsim(src_a, benchmark_width_, src_b, benchmark_width_,
|
||||
benchmark_width_, benchmark_height_);
|
||||
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations_;
|
||||
@ -435,14 +434,14 @@ TEST_F(LibYUVBaseTest, Ssim) {
|
||||
memset(src_a, 0, kSrcPlaneSize);
|
||||
memset(src_b, 0, kSrcPlaneSize);
|
||||
|
||||
if (kSrcWidth <=8 || kSrcHeight <= 8) {
|
||||
if (kSrcWidth <= 8 || kSrcHeight <= 8) {
|
||||
printf("warning - Ssim size too small. Testing function executes.\n");
|
||||
}
|
||||
|
||||
double err;
|
||||
err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
if (kSrcWidth > 8 && kSrcHeight > 8) {
|
||||
EXPECT_EQ(err, 1.0);
|
||||
@ -451,8 +450,8 @@ TEST_F(LibYUVBaseTest, Ssim) {
|
||||
memset(src_a, 255, kSrcPlaneSize);
|
||||
|
||||
err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
if (kSrcWidth > 8 && kSrcHeight > 8) {
|
||||
EXPECT_LT(err, 0.0001);
|
||||
@ -461,8 +460,8 @@ TEST_F(LibYUVBaseTest, Ssim) {
|
||||
memset(src_a, 1, kSrcPlaneSize);
|
||||
|
||||
err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
if (kSrcWidth > 8 && kSrcHeight > 8) {
|
||||
EXPECT_GT(err, 0.0001);
|
||||
@ -474,8 +473,8 @@ TEST_F(LibYUVBaseTest, Ssim) {
|
||||
}
|
||||
|
||||
err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
if (kSrcWidth > 8 && kSrcHeight > 8) {
|
||||
EXPECT_GT(err, 0.0);
|
||||
@ -493,14 +492,14 @@ TEST_F(LibYUVBaseTest, Ssim) {
|
||||
double c_err, opt_err;
|
||||
|
||||
c_err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
|
||||
opt_err = CalcFrameSsim(src_a + kSrcStride * b + b, kSrcStride,
|
||||
src_b + kSrcStride * b + b, kSrcStride,
|
||||
kSrcWidth, kSrcHeight);
|
||||
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
|
||||
kSrcHeight);
|
||||
|
||||
if (kSrcWidth > 8 && kSrcHeight > 8) {
|
||||
EXPECT_EQ(opt_err, c_err);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -11,10 +11,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/version.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
@ -64,19 +64,20 @@ TEST_F(LibYUVBaseTest, TestCpuCompilerEnabled) {
|
||||
printf("x64 build\n");
|
||||
#endif
|
||||
#ifdef _MSC_VER
|
||||
printf("_MSC_VER %d\n", _MSC_VER);
|
||||
printf("_MSC_VER %d\n", _MSC_VER);
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(GCC_HAS_AVX2) || \
|
||||
defined(CLANG_HAS_AVX2) || defined(VISUALC_HAS_AVX2))
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(GCC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
|
||||
defined(VISUALC_HAS_AVX2))
|
||||
printf("Has AVX2 1\n");
|
||||
#else
|
||||
printf("Has AVX2 0\n");
|
||||
// If compiler does not support AVX2, the following function not expected:
|
||||
// If compiler does not support AVX2, the following function not expected:
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || \
|
||||
defined(_M_IX86) || defined(_M_X64)
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
|
||||
defined(_M_X64)
|
||||
TEST_F(LibYUVBaseTest, TestCpuId) {
|
||||
int has_x86 = TestCpuFlag(kCpuHasX86);
|
||||
if (has_x86) {
|
||||
@ -110,8 +111,8 @@ TEST_F(LibYUVBaseTest, TestCpuId) {
|
||||
CpuId(1, 0, cpu_info);
|
||||
int family = ((cpu_info[0] >> 8) & 0x0f) | ((cpu_info[0] >> 16) & 0xff0);
|
||||
int model = ((cpu_info[0] >> 4) & 0x0f) | ((cpu_info[0] >> 12) & 0xf0);
|
||||
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family,
|
||||
model, model);
|
||||
printf("Cpu Family %d (0x%x), Model %d (0x%x)\n", family, family, model,
|
||||
model);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -12,11 +12,11 @@
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/scale.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -10,14 +10,16 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/rotate_argb.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
void TestRotateBpp(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
void TestRotateBpp(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
@ -51,26 +53,22 @@ void TestRotateBpp(int src_width, int src_height,
|
||||
|
||||
if (kBpp == 1) {
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
RotatePlane(src_argb, src_stride_argb,
|
||||
dst_argb_c, dst_stride_argb,
|
||||
RotatePlane(src_argb, src_stride_argb, dst_argb_c, dst_stride_argb,
|
||||
src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
RotatePlane(src_argb, src_stride_argb,
|
||||
dst_argb_opt, dst_stride_argb,
|
||||
RotatePlane(src_argb, src_stride_argb, dst_argb_opt, dst_stride_argb,
|
||||
src_width, src_height, mode);
|
||||
}
|
||||
} else if (kBpp == 4) {
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
ARGBRotate(src_argb, src_stride_argb,
|
||||
dst_argb_c, dst_stride_argb,
|
||||
ARGBRotate(src_argb, src_stride_argb, dst_argb_c, dst_stride_argb,
|
||||
src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
ARGBRotate(src_argb, src_stride_argb,
|
||||
dst_argb_opt, dst_stride_argb,
|
||||
ARGBRotate(src_argb, src_stride_argb, dst_argb_opt, dst_stride_argb,
|
||||
src_width, src_height, mode);
|
||||
}
|
||||
}
|
||||
@ -85,112 +83,104 @@ void TestRotateBpp(int src_width, int src_height,
|
||||
free_aligned_buffer_page_end(src_argb);
|
||||
}
|
||||
|
||||
static void ARGBTestRotate(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
static void ARGBTestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
TestRotateBpp(src_width, src_height,
|
||||
dst_width, dst_height,
|
||||
mode, benchmark_iterations,
|
||||
disable_cpu_flags, benchmark_cpu_info, 4);
|
||||
TestRotateBpp(src_width, src_height, dst_width, dst_height, mode,
|
||||
benchmark_iterations, disable_cpu_flags, benchmark_cpu_info, 4);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, ARGBRotate0_Opt) {
|
||||
ARGBTestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate0, benchmark_iterations_,
|
||||
ARGBTestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, ARGBRotate90_Opt) {
|
||||
ARGBTestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate90, benchmark_iterations_,
|
||||
ARGBTestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, ARGBRotate180_Opt) {
|
||||
ARGBTestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate180, benchmark_iterations_,
|
||||
ARGBTestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, ARGBRotate270_Opt) {
|
||||
ARGBTestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate270, benchmark_iterations_,
|
||||
ARGBTestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
static void TestRotatePlane(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
static void TestRotatePlane(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
TestRotateBpp(src_width, src_height,
|
||||
dst_width, dst_height,
|
||||
mode, benchmark_iterations,
|
||||
disable_cpu_flags, benchmark_cpu_info, 1);
|
||||
TestRotateBpp(src_width, src_height, dst_width, dst_height, mode,
|
||||
benchmark_iterations, disable_cpu_flags, benchmark_cpu_info, 1);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane0_Opt) {
|
||||
TestRotatePlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
TestRotatePlane(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane90_Opt) {
|
||||
TestRotatePlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
TestRotatePlane(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane180_Opt) {
|
||||
TestRotatePlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
TestRotatePlane(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, RotatePlane270_Opt) {
|
||||
TestRotatePlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
TestRotatePlane(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_RotatePlane0_Odd) {
|
||||
TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_RotatePlane90_Odd) {
|
||||
TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3,
|
||||
kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_RotatePlane180_Odd) {
|
||||
TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_RotatePlane270_Odd) {
|
||||
TestRotatePlane(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3,
|
||||
kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
@ -10,17 +10,20 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/rotate.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
static void I420TestRotate(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
static void I420TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags, int benchmark_cpu_info) {
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (src_width < 1) {
|
||||
src_width = 1;
|
||||
}
|
||||
@ -50,26 +53,21 @@ static void I420TestRotate(int src_width, int src_height,
|
||||
memset(dst_i420_opt, 3, dst_i420_size);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
I420Rotate(src_i420, src_width,
|
||||
src_i420 + src_i420_y_size, (src_width + 1) / 2,
|
||||
src_i420 + src_i420_y_size + src_i420_uv_size, (src_width + 1) / 2,
|
||||
dst_i420_c, dst_width,
|
||||
I420Rotate(src_i420, src_width, src_i420 + src_i420_y_size,
|
||||
(src_width + 1) / 2, src_i420 + src_i420_y_size + src_i420_uv_size,
|
||||
(src_width + 1) / 2, dst_i420_c, dst_width,
|
||||
dst_i420_c + dst_i420_y_size, (dst_width + 1) / 2,
|
||||
dst_i420_c + dst_i420_y_size + dst_i420_uv_size,
|
||||
(dst_width + 1) / 2,
|
||||
src_width, src_height, mode);
|
||||
(dst_width + 1) / 2, src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
I420Rotate(src_i420, src_width,
|
||||
src_i420 + src_i420_y_size, (src_width + 1) / 2,
|
||||
src_i420 + src_i420_y_size + src_i420_uv_size,
|
||||
(src_width + 1) / 2,
|
||||
dst_i420_opt, dst_width,
|
||||
dst_i420_opt + dst_i420_y_size, (dst_width + 1) / 2,
|
||||
dst_i420_opt + dst_i420_y_size + dst_i420_uv_size,
|
||||
(dst_width + 1) / 2,
|
||||
src_width, src_height, mode);
|
||||
I420Rotate(
|
||||
src_i420, src_width, src_i420 + src_i420_y_size, (src_width + 1) / 2,
|
||||
src_i420 + src_i420_y_size + src_i420_uv_size, (src_width + 1) / 2,
|
||||
dst_i420_opt, dst_width, dst_i420_opt + dst_i420_y_size,
|
||||
(dst_width + 1) / 2, dst_i420_opt + dst_i420_y_size + dst_i420_uv_size,
|
||||
(dst_width + 1) / 2, src_width, src_height, mode);
|
||||
}
|
||||
|
||||
// Rotation should be exact.
|
||||
@ -83,30 +81,26 @@ static void I420TestRotate(int src_width, int src_height,
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I420Rotate0_Opt) {
|
||||
I420TestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate0, benchmark_iterations_,
|
||||
I420TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I420Rotate90_Opt) {
|
||||
I420TestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate90, benchmark_iterations_,
|
||||
I420TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I420Rotate180_Opt) {
|
||||
I420TestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate180, benchmark_iterations_,
|
||||
I420TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I420Rotate270_Opt) {
|
||||
I420TestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate270, benchmark_iterations_,
|
||||
I420TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
@ -115,37 +109,40 @@ TEST_F(LibYUVRotateTest, I420Rotate270_Opt) {
|
||||
// tested by passing an odd width command line or environment variable.
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I420Rotate0_Odd) {
|
||||
I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I420Rotate90_Odd) {
|
||||
I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3,
|
||||
kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I420Rotate180_Odd) {
|
||||
I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
|
||||
I420TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3,
|
||||
kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
static void NV12TestRotate(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
static void NV12TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags, int benchmark_cpu_info) {
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (src_width < 1) {
|
||||
src_width = 1;
|
||||
}
|
||||
@ -176,23 +173,19 @@ static void NV12TestRotate(int src_width, int src_height,
|
||||
memset(dst_i420_opt, 3, dst_i420_size);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
NV12ToI420Rotate(src_nv12, src_width,
|
||||
src_nv12 + src_nv12_y_size, (src_width + 1) & ~1,
|
||||
dst_i420_c, dst_width,
|
||||
NV12ToI420Rotate(src_nv12, src_width, src_nv12 + src_nv12_y_size,
|
||||
(src_width + 1) & ~1, dst_i420_c, dst_width,
|
||||
dst_i420_c + dst_i420_y_size, (dst_width + 1) / 2,
|
||||
dst_i420_c + dst_i420_y_size + dst_i420_uv_size,
|
||||
(dst_width + 1) / 2,
|
||||
src_width, src_height, mode);
|
||||
(dst_width + 1) / 2, src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
NV12ToI420Rotate(src_nv12, src_width,
|
||||
src_nv12 + src_nv12_y_size, (src_width + 1) & ~1,
|
||||
dst_i420_opt, dst_width,
|
||||
NV12ToI420Rotate(src_nv12, src_width, src_nv12 + src_nv12_y_size,
|
||||
(src_width + 1) & ~1, dst_i420_opt, dst_width,
|
||||
dst_i420_opt + dst_i420_y_size, (dst_width + 1) / 2,
|
||||
dst_i420_opt + dst_i420_y_size + dst_i420_uv_size,
|
||||
(dst_width + 1) / 2,
|
||||
src_width, src_height, mode);
|
||||
(dst_width + 1) / 2, src_width, src_height, mode);
|
||||
}
|
||||
|
||||
// Rotation should be exact.
|
||||
@ -206,91 +199,79 @@ static void NV12TestRotate(int src_width, int src_height,
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, NV12Rotate0_Opt) {
|
||||
NV12TestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate0, benchmark_iterations_,
|
||||
NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, NV12Rotate90_Opt) {
|
||||
NV12TestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate90, benchmark_iterations_,
|
||||
NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, NV12Rotate180_Opt) {
|
||||
NV12TestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate180, benchmark_iterations_,
|
||||
NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, NV12Rotate270_Opt) {
|
||||
NV12TestRotate(benchmark_width_, benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate270, benchmark_iterations_,
|
||||
NV12TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate0_Odd) {
|
||||
NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate90_Odd) {
|
||||
NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3,
|
||||
kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate180_Odd) {
|
||||
NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_NV12Rotate270_Odd) {
|
||||
NV12TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3,
|
||||
kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, NV12Rotate0_Invert) {
|
||||
NV12TestRotate(benchmark_width_, -benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate0, benchmark_iterations_,
|
||||
NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, NV12Rotate90_Invert) {
|
||||
NV12TestRotate(benchmark_width_, -benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate90, benchmark_iterations_,
|
||||
NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, NV12Rotate180_Invert) {
|
||||
NV12TestRotate(benchmark_width_, -benchmark_height_,
|
||||
benchmark_width_, benchmark_height_,
|
||||
kRotate180, benchmark_iterations_,
|
||||
NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
|
||||
NV12TestRotate(benchmark_width_, -benchmark_height_,
|
||||
benchmark_height_, benchmark_width_,
|
||||
kRotate270, benchmark_iterations_,
|
||||
NV12TestRotate(benchmark_width_, -benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
@ -11,11 +11,11 @@
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/convert_argb.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/scale_argb.h"
|
||||
#include "libyuv/video_common.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
@ -23,18 +23,22 @@ namespace libyuv {
|
||||
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
|
||||
|
||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||
static int ARGBTestFilter(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
FilterMode f, int benchmark_iterations,
|
||||
int disable_cpu_flags, int benchmark_cpu_info) {
|
||||
static int ARGBTestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int i, j;
|
||||
const int b = 0; // 128 to test for padding/stride.
|
||||
int64 src_argb_plane_size = (Abs(src_width) + b * 2) *
|
||||
(Abs(src_height) + b * 2) * 4LL;
|
||||
int64 src_argb_plane_size =
|
||||
(Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4LL;
|
||||
int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
|
||||
|
||||
align_buffer_page_end(src_argb, src_argb_plane_size);
|
||||
@ -59,21 +63,18 @@ static int ARGBTestFilter(int src_width, int src_height,
|
||||
// Warm up both versions for consistent benchmarks.
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
|
||||
src_width, src_height,
|
||||
dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb,
|
||||
dst_width, dst_height, f);
|
||||
src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
|
||||
dst_stride_argb, dst_width, dst_height, f);
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
|
||||
src_width, src_height,
|
||||
dst_argb_opt + (dst_stride_argb * b) + b * 4, dst_stride_argb,
|
||||
dst_width, dst_height, f);
|
||||
src_width, src_height, dst_argb_opt + (dst_stride_argb * b) + b * 4,
|
||||
dst_stride_argb, dst_width, dst_height, f);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
double c_time = get_time();
|
||||
ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
|
||||
src_width, src_height,
|
||||
dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb,
|
||||
dst_width, dst_height, f);
|
||||
src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
|
||||
dst_stride_argb, dst_width, dst_height, f);
|
||||
|
||||
c_time = (get_time() - c_time);
|
||||
|
||||
@ -88,8 +89,8 @@ static int ARGBTestFilter(int src_width, int src_height,
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||
|
||||
// Report performance of C vs OPT
|
||||
printf("filter %d - %8d us C - %8d us OPT\n",
|
||||
f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
|
||||
printf("filter %d - %8d us C - %8d us OPT\n", f,
|
||||
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
|
||||
|
||||
// C version may be a little off from the optimized. Order of
|
||||
// operations may introduce rounding somewhere. So do a difference
|
||||
@ -115,10 +116,14 @@ static int ARGBTestFilter(int src_width, int src_height,
|
||||
static const int kTileX = 8;
|
||||
static const int kTileY = 8;
|
||||
|
||||
static int TileARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
static int TileARGBScale(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode filtering) {
|
||||
for (int y = 0; y < dst_height; y += kTileY) {
|
||||
for (int x = 0; x < dst_width; x += kTileX) {
|
||||
@ -130,11 +135,9 @@ static int TileARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
if (y + clip_height > dst_height) {
|
||||
clip_height = dst_height - y;
|
||||
}
|
||||
int r = ARGBScaleClip(src_argb, src_stride_argb,
|
||||
src_width, src_height,
|
||||
dst_argb, dst_stride_argb,
|
||||
dst_width, dst_height,
|
||||
x, y, clip_width, clip_height, filtering);
|
||||
int r = ARGBScaleClip(src_argb, src_stride_argb, src_width, src_height,
|
||||
dst_argb, dst_stride_argb, dst_width, dst_height, x,
|
||||
y, clip_width, clip_height, filtering);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
@ -143,16 +146,19 @@ static int TileARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ARGBClipTestFilter(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
FilterMode f, int benchmark_iterations) {
|
||||
static int ARGBClipTestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const int b = 128;
|
||||
int64 src_argb_plane_size = (Abs(src_width) + b * 2) *
|
||||
(Abs(src_height) + b * 2) * 4;
|
||||
int64 src_argb_plane_size =
|
||||
(Abs(src_width) + b * 2) * (Abs(src_height) + b * 2) * 4;
|
||||
int src_stride_argb = (b * 2 + Abs(src_width)) * 4;
|
||||
|
||||
align_buffer_page_end(src_argb, src_argb_plane_size);
|
||||
@ -184,9 +190,8 @@ static int ARGBClipTestFilter(int src_width, int src_height,
|
||||
// Do full image, no clipping.
|
||||
double c_time = get_time();
|
||||
ARGBScale(src_argb + (src_stride_argb * b) + b * 4, src_stride_argb,
|
||||
src_width, src_height,
|
||||
dst_argb_c + (dst_stride_argb * b) + b * 4, dst_stride_argb,
|
||||
dst_width, dst_height, f);
|
||||
src_width, src_height, dst_argb_c + (dst_stride_argb * b) + b * 4,
|
||||
dst_stride_argb, dst_width, dst_height, f);
|
||||
c_time = (get_time() - c_time);
|
||||
|
||||
// Do tiled image, clipping scale to a tile at a time.
|
||||
@ -200,8 +205,8 @@ static int ARGBClipTestFilter(int src_width, int src_height,
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||
|
||||
// Report performance of Full vs Tiled.
|
||||
printf("filter %d - %8d us Full - %8d us Tiled\n",
|
||||
f, static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
|
||||
printf("filter %d - %8d us Full - %8d us Tiled\n", f,
|
||||
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
|
||||
|
||||
// Compare full scaled image vs tiled image.
|
||||
int max_diff = 0;
|
||||
@ -226,32 +231,30 @@ static int ARGBClipTestFilter(int src_width, int src_height,
|
||||
#define DX(x, nom, denom) static_cast<int>((Abs(x) / nom) * nom)
|
||||
#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
|
||||
|
||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) { \
|
||||
int diff = ARGBTestFilter(SX(benchmark_width_, nom, denom), \
|
||||
SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), \
|
||||
DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, ARGBScaleDownClipBy##name##_##filter) { \
|
||||
int diff = ARGBClipTestFilter(SX(benchmark_width_, nom, denom), \
|
||||
SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), \
|
||||
DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, ARGBScaleDownBy##name##_##filter) { \
|
||||
int diff = ARGBTestFilter( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, ARGBScaleDownClipBy##name##_##filter) { \
|
||||
int diff = ARGBClipTestFilter( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
|
||||
// filtering is different fixed point implementations for SSSE3, Neon and C.
|
||||
#define TEST_FACTOR(name, nom, denom) \
|
||||
TEST_FACTOR1(name, None, nom, denom, 0) \
|
||||
TEST_FACTOR1(name, Linear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Box, nom, denom, 3)
|
||||
#define TEST_FACTOR(name, nom, denom) \
|
||||
TEST_FACTOR1(name, None, nom, denom, 0) \
|
||||
TEST_FACTOR1(name, Linear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Box, nom, denom, 3)
|
||||
|
||||
TEST_FACTOR(2, 1, 2)
|
||||
TEST_FACTOR(4, 1, 4)
|
||||
@ -265,39 +268,37 @@ TEST_FACTOR(3, 1, 3)
|
||||
#undef DX
|
||||
|
||||
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
|
||||
int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, \
|
||||
width, height, \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
|
||||
int diff = ARGBTestFilter(width, height, \
|
||||
Abs(benchmark_width_), Abs(benchmark_height_), \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##ClipTo##width##x##height##_##filter) { \
|
||||
int diff = ARGBClipTestFilter(benchmark_width_, benchmark_height_, \
|
||||
width, height, \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##ClipFrom##width##x##height##_##filter) { \
|
||||
int diff = ARGBClipTestFilter(width, height, \
|
||||
Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
|
||||
int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
|
||||
int diff = ARGBTestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##ClipTo##width##x##height##_##filter) { \
|
||||
int diff = \
|
||||
ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##ClipFrom##width##x##height##_##filter) { \
|
||||
int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
/// Test scale to a specified size with all 4 filters.
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(name, width, height, None, 0) \
|
||||
TEST_SCALETO1(name, width, height, Linear, 3) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 3)
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(name, width, height, None, 0) \
|
||||
TEST_SCALETO1(name, width, height, Linear, 3) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 3)
|
||||
|
||||
TEST_SCALETO(ARGBScale, 1, 1)
|
||||
TEST_SCALETO(ARGBScale, 320, 240)
|
||||
@ -310,31 +311,33 @@ TEST_SCALETO(ARGBScale, 1280, 720)
|
||||
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleReference2(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int YUVToARGBScaleReference2(const uint8* src_y,
|
||||
int src_stride_y,
|
||||
const uint8* src_u,
|
||||
int src_stride_u,
|
||||
const uint8* src_v,
|
||||
int src_stride_v,
|
||||
uint32 src_fourcc,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
uint32 dst_fourcc,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y,
|
||||
int clip_width, int clip_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
uint8* argb_buffer = static_cast<uint8*>(malloc(src_width * src_height * 4));
|
||||
int r;
|
||||
I420ToARGB(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
argb_buffer, src_width * 4,
|
||||
src_width, src_height);
|
||||
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
|
||||
argb_buffer, src_width * 4, src_width, src_height);
|
||||
|
||||
r = ARGBScaleClip(argb_buffer, src_width * 4,
|
||||
src_width, src_height,
|
||||
dst_argb, dst_stride_argb,
|
||||
dst_width, dst_height,
|
||||
clip_x, clip_y, clip_width, clip_height,
|
||||
filtering);
|
||||
r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
|
||||
dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
|
||||
clip_width, clip_height, filtering);
|
||||
free(argb_buffer);
|
||||
return r;
|
||||
}
|
||||
@ -360,13 +363,17 @@ static void FillRamp(uint8* buf, int width, int height, int v, int dx, int dy) {
|
||||
}
|
||||
|
||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||
static int YUVToARGBTestFilter(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
FilterMode f, int benchmark_iterations,
|
||||
int disable_cpu_flags, int benchmark_cpu_info) {
|
||||
static int YUVToARGBTestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
int64 src_y_plane_size = Abs(src_width) * Abs(src_height);
|
||||
int64 src_uv_plane_size = ((Abs(src_width) + 1) / 2) *
|
||||
((Abs(src_height) + 1) / 2);
|
||||
int64 src_uv_plane_size =
|
||||
((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2);
|
||||
int src_stride_y = Abs(src_width);
|
||||
int src_stride_uv = (Abs(src_width) + 1) / 2;
|
||||
|
||||
@ -374,8 +381,8 @@ static int YUVToARGBTestFilter(int src_width, int src_height,
|
||||
align_buffer_page_end(src_u, src_uv_plane_size);
|
||||
align_buffer_page_end(src_v, src_uv_plane_size);
|
||||
|
||||
int64 dst_argb_plane_size = (dst_width) * (dst_height) * 4LL;
|
||||
int dst_stride_argb = (dst_width) * 4;
|
||||
int64 dst_argb_plane_size = (dst_width) * (dst_height)*4LL;
|
||||
int dst_stride_argb = (dst_width)*4;
|
||||
align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
|
||||
align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
|
||||
if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) {
|
||||
@ -390,28 +397,18 @@ static int YUVToARGBTestFilter(int src_width, int src_height,
|
||||
memset(dst_argb_c, 2, dst_argb_plane_size);
|
||||
memset(dst_argb_opt, 3, dst_argb_plane_size);
|
||||
|
||||
YUVToARGBScaleReference2(src_y, src_stride_y,
|
||||
src_u, src_stride_uv,
|
||||
src_v, src_stride_uv,
|
||||
libyuv::FOURCC_I420,
|
||||
src_width, src_height,
|
||||
dst_argb_c, dst_stride_argb,
|
||||
libyuv::FOURCC_I420,
|
||||
dst_width, dst_height,
|
||||
0, 0, dst_width, dst_height,
|
||||
f);
|
||||
YUVToARGBScaleReference2(src_y, src_stride_y, src_u, src_stride_uv, src_v,
|
||||
src_stride_uv, libyuv::FOURCC_I420, src_width,
|
||||
src_height, dst_argb_c, dst_stride_argb,
|
||||
libyuv::FOURCC_I420, dst_width, dst_height, 0, 0,
|
||||
dst_width, dst_height, f);
|
||||
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
YUVToARGBScaleClip(src_y, src_stride_y,
|
||||
src_u, src_stride_uv,
|
||||
src_v, src_stride_uv,
|
||||
libyuv::FOURCC_I420,
|
||||
src_width, src_height,
|
||||
dst_argb_opt, dst_stride_argb,
|
||||
libyuv::FOURCC_I420,
|
||||
dst_width, dst_height,
|
||||
0, 0, dst_width, dst_height,
|
||||
f);
|
||||
YUVToARGBScaleClip(src_y, src_stride_y, src_u, src_stride_uv, src_v,
|
||||
src_stride_uv, libyuv::FOURCC_I420, src_width,
|
||||
src_height, dst_argb_opt, dst_stride_argb,
|
||||
libyuv::FOURCC_I420, dst_width, dst_height, 0, 0,
|
||||
dst_width, dst_height, f);
|
||||
}
|
||||
int max_diff = 0;
|
||||
for (int i = 0; i < dst_height; ++i) {
|
||||
@ -419,9 +416,7 @@ static int YUVToARGBTestFilter(int src_width, int src_height,
|
||||
int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
|
||||
dst_argb_opt[(i * dst_stride_argb) + j]);
|
||||
if (abs_diff > max_diff) {
|
||||
printf("error %d at %d,%d c %d opt %d",
|
||||
abs_diff,
|
||||
j, i,
|
||||
printf("error %d at %d,%d c %d opt %d", abs_diff, j, i,
|
||||
dst_argb_c[(i * dst_stride_argb) + j],
|
||||
dst_argb_opt[(i * dst_stride_argb) + j]);
|
||||
EXPECT_LE(abs_diff, 40);
|
||||
@ -439,24 +434,19 @@ static int YUVToARGBTestFilter(int src_width, int src_height,
|
||||
}
|
||||
|
||||
TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) {
|
||||
int diff = YUVToARGBTestFilter(benchmark_width_, benchmark_height_,
|
||||
benchmark_width_ * 3 / 2,
|
||||
benchmark_height_ * 3 / 2,
|
||||
libyuv::kFilterBilinear,
|
||||
benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
int diff = YUVToARGBTestFilter(
|
||||
benchmark_width_, benchmark_height_, benchmark_width_ * 3 / 2,
|
||||
benchmark_height_ * 3 / 2, libyuv::kFilterBilinear, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
EXPECT_LE(diff, 10);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
|
||||
int diff = YUVToARGBTestFilter(benchmark_width_ * 3 / 2,
|
||||
benchmark_height_ * 3 / 2,
|
||||
benchmark_width_, benchmark_height_,
|
||||
libyuv::kFilterBilinear,
|
||||
benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
int diff = YUVToARGBTestFilter(
|
||||
benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_,
|
||||
benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
EXPECT_LE(diff, 10);
|
||||
}
|
||||
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
@ -11,9 +11,9 @@
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/scale.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
|
||||
#define STRINGIZE(line) #line
|
||||
#define FILELINESTR(file, line) file ":" STRINGIZE(line)
|
||||
@ -21,10 +21,14 @@
|
||||
namespace libyuv {
|
||||
|
||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||
static int TestFilter(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
FilterMode f, int benchmark_iterations,
|
||||
int disable_cpu_flags, int benchmark_cpu_info) {
|
||||
static int TestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
@ -41,9 +45,8 @@ static int TestFilter(int src_width, int src_height,
|
||||
int src_stride_uv = b * 2 + src_width_uv;
|
||||
|
||||
align_buffer_page_end(src_y, src_y_plane_size)
|
||||
align_buffer_page_end(src_u, src_uv_plane_size)
|
||||
align_buffer_page_end(src_v, src_uv_plane_size)
|
||||
if (!src_y || !src_u || !src_v) {
|
||||
align_buffer_page_end(src_u, src_uv_plane_size) align_buffer_page_end(
|
||||
src_v, src_uv_plane_size) if (!src_y || !src_u || !src_v) {
|
||||
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
|
||||
return 0;
|
||||
}
|
||||
@ -61,13 +64,15 @@ static int TestFilter(int src_width, int src_height,
|
||||
int dst_stride_uv = b * 2 + dst_width_uv;
|
||||
|
||||
align_buffer_page_end(dst_y_c, dst_y_plane_size)
|
||||
align_buffer_page_end(dst_u_c, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_v_c, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_y_opt, dst_y_plane_size)
|
||||
align_buffer_page_end(dst_u_opt, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_v_opt, dst_uv_plane_size)
|
||||
if (!dst_y_c || !dst_u_c || !dst_v_c ||
|
||||
!dst_y_opt|| !dst_u_opt|| !dst_v_opt) {
|
||||
align_buffer_page_end(dst_u_c, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_v_c, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_y_opt, dst_y_plane_size)
|
||||
align_buffer_page_end(dst_u_opt, dst_uv_plane_size)
|
||||
align_buffer_page_end(
|
||||
dst_v_opt,
|
||||
dst_uv_plane_size) if (!dst_y_c || !dst_u_c ||
|
||||
!dst_v_c || !dst_y_opt ||
|
||||
!dst_u_opt || !dst_v_opt) {
|
||||
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
|
||||
return 0;
|
||||
}
|
||||
@ -76,12 +81,11 @@ static int TestFilter(int src_width, int src_height,
|
||||
double c_time = get_time();
|
||||
I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
|
||||
src_u + (src_stride_uv * b) + b, src_stride_uv,
|
||||
src_v + (src_stride_uv * b) + b, src_stride_uv,
|
||||
src_width, src_height,
|
||||
dst_y_c + (dst_stride_y * b) + b, dst_stride_y,
|
||||
src_v + (src_stride_uv * b) + b, src_stride_uv, src_width,
|
||||
src_height, dst_y_c + (dst_stride_y * b) + b, dst_stride_y,
|
||||
dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv,
|
||||
dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv,
|
||||
dst_width, dst_height, f);
|
||||
dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv, dst_width,
|
||||
dst_height, f);
|
||||
c_time = (get_time() - c_time);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
@ -89,19 +93,16 @@ static int TestFilter(int src_width, int src_height,
|
||||
for (i = 0; i < benchmark_iterations; ++i) {
|
||||
I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
|
||||
src_u + (src_stride_uv * b) + b, src_stride_uv,
|
||||
src_v + (src_stride_uv * b) + b, src_stride_uv,
|
||||
src_width, src_height,
|
||||
dst_y_opt + (dst_stride_y * b) + b, dst_stride_y,
|
||||
src_v + (src_stride_uv * b) + b, src_stride_uv, src_width,
|
||||
src_height, dst_y_opt + (dst_stride_y * b) + b, dst_stride_y,
|
||||
dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv,
|
||||
dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv,
|
||||
dst_width, dst_height, f);
|
||||
dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv, dst_width,
|
||||
dst_height, f);
|
||||
}
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||
// Report performance of C vs OPT
|
||||
printf("filter %d - %8d us C - %8d us OPT\n",
|
||||
f,
|
||||
static_cast<int>(c_time * 1e6),
|
||||
static_cast<int>(opt_time * 1e6));
|
||||
printf("filter %d - %8d us C - %8d us OPT\n", f,
|
||||
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
|
||||
|
||||
// C version may be a little off from the optimized. Order of
|
||||
// operations may introduce rounding somewhere. So do a difference
|
||||
@ -133,25 +134,27 @@ static int TestFilter(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(dst_y_c)
|
||||
free_aligned_buffer_page_end(dst_u_c)
|
||||
free_aligned_buffer_page_end(dst_v_c)
|
||||
free_aligned_buffer_page_end(dst_y_opt)
|
||||
free_aligned_buffer_page_end(dst_u_opt)
|
||||
free_aligned_buffer_page_end(dst_v_opt)
|
||||
free_aligned_buffer_page_end(dst_y_c) free_aligned_buffer_page_end(dst_u_c)
|
||||
free_aligned_buffer_page_end(dst_v_c)
|
||||
free_aligned_buffer_page_end(dst_y_opt)
|
||||
free_aligned_buffer_page_end(dst_u_opt)
|
||||
free_aligned_buffer_page_end(dst_v_opt)
|
||||
|
||||
free_aligned_buffer_page_end(src_y)
|
||||
free_aligned_buffer_page_end(src_u)
|
||||
free_aligned_buffer_page_end(src_v)
|
||||
free_aligned_buffer_page_end(src_y)
|
||||
free_aligned_buffer_page_end(src_u)
|
||||
free_aligned_buffer_page_end(src_v)
|
||||
|
||||
return max_diff;
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
|
||||
// 0 = exact.
|
||||
static int TestFilter_16(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
FilterMode f, int benchmark_iterations) {
|
||||
static int TestFilter_16(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
@ -161,20 +164,18 @@ static int TestFilter_16(int src_width, int src_height,
|
||||
int src_width_uv = (Abs(src_width) + 1) >> 1;
|
||||
int src_height_uv = (Abs(src_height) + 1) >> 1;
|
||||
|
||||
int64 src_y_plane_size = (Abs(src_width) + b * 2) *
|
||||
(Abs(src_height) + b * 2);
|
||||
int64 src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
|
||||
int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
|
||||
|
||||
int src_stride_y = b * 2 + Abs(src_width);
|
||||
int src_stride_uv = b * 2 + src_width_uv;
|
||||
|
||||
align_buffer_page_end(src_y, src_y_plane_size)
|
||||
align_buffer_page_end(src_u, src_uv_plane_size)
|
||||
align_buffer_page_end(src_v, src_uv_plane_size)
|
||||
align_buffer_page_end(src_y_16, src_y_plane_size * 2)
|
||||
align_buffer_page_end(src_u_16, src_uv_plane_size * 2)
|
||||
align_buffer_page_end(src_v_16, src_uv_plane_size * 2)
|
||||
uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
|
||||
align_buffer_page_end(src_y, src_y_plane_size) align_buffer_page_end(
|
||||
src_u, src_uv_plane_size) align_buffer_page_end(src_v, src_uv_plane_size)
|
||||
align_buffer_page_end(src_y_16, src_y_plane_size * 2)
|
||||
align_buffer_page_end(src_u_16, src_uv_plane_size * 2)
|
||||
align_buffer_page_end(src_v_16, src_uv_plane_size * 2)
|
||||
uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
|
||||
uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16);
|
||||
uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16);
|
||||
|
||||
@ -205,34 +206,33 @@ static int TestFilter_16(int src_width, int src_height,
|
||||
int dst_stride_uv = b * 2 + dst_width_uv;
|
||||
|
||||
align_buffer_page_end(dst_y_8, dst_y_plane_size)
|
||||
align_buffer_page_end(dst_u_8, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_v_8, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_y_16, dst_y_plane_size * 2)
|
||||
align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2)
|
||||
align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2)
|
||||
align_buffer_page_end(dst_u_8, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_v_8, dst_uv_plane_size)
|
||||
align_buffer_page_end(dst_y_16, dst_y_plane_size * 2)
|
||||
align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2)
|
||||
align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2)
|
||||
|
||||
uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
|
||||
uint16* p_dst_y_16 =
|
||||
reinterpret_cast<uint16*>(dst_y_16);
|
||||
uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16);
|
||||
uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16);
|
||||
|
||||
I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
|
||||
src_u + (src_stride_uv * b) + b, src_stride_uv,
|
||||
src_v + (src_stride_uv * b) + b, src_stride_uv,
|
||||
src_width, src_height,
|
||||
dst_y_8 + (dst_stride_y * b) + b, dst_stride_y,
|
||||
src_v + (src_stride_uv * b) + b, src_stride_uv, src_width,
|
||||
src_height, dst_y_8 + (dst_stride_y * b) + b, dst_stride_y,
|
||||
dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv,
|
||||
dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv,
|
||||
dst_width, dst_height, f);
|
||||
dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv, dst_width,
|
||||
dst_height, f);
|
||||
|
||||
for (i = 0; i < benchmark_iterations; ++i) {
|
||||
I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y,
|
||||
p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv,
|
||||
p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv,
|
||||
src_width, src_height,
|
||||
p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y,
|
||||
p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv, src_width,
|
||||
src_height, p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y,
|
||||
p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv,
|
||||
p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv,
|
||||
dst_width, dst_height, f);
|
||||
p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv, dst_width,
|
||||
dst_height, f);
|
||||
}
|
||||
|
||||
// Expect an exact match
|
||||
@ -262,21 +262,20 @@ static int TestFilter_16(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(dst_y_8)
|
||||
free_aligned_buffer_page_end(dst_u_8)
|
||||
free_aligned_buffer_page_end(dst_v_8)
|
||||
free_aligned_buffer_page_end(dst_y_16)
|
||||
free_aligned_buffer_page_end(dst_u_16)
|
||||
free_aligned_buffer_page_end(dst_v_16)
|
||||
free_aligned_buffer_page_end(dst_y_8) free_aligned_buffer_page_end(dst_u_8)
|
||||
free_aligned_buffer_page_end(dst_v_8)
|
||||
free_aligned_buffer_page_end(dst_y_16)
|
||||
free_aligned_buffer_page_end(dst_u_16)
|
||||
free_aligned_buffer_page_end(dst_v_16)
|
||||
|
||||
free_aligned_buffer_page_end(src_y)
|
||||
free_aligned_buffer_page_end(src_u)
|
||||
free_aligned_buffer_page_end(src_v)
|
||||
free_aligned_buffer_page_end(src_y_16)
|
||||
free_aligned_buffer_page_end(src_u_16)
|
||||
free_aligned_buffer_page_end(src_v_16)
|
||||
free_aligned_buffer_page_end(src_y)
|
||||
free_aligned_buffer_page_end(src_u)
|
||||
free_aligned_buffer_page_end(src_v)
|
||||
free_aligned_buffer_page_end(src_y_16)
|
||||
free_aligned_buffer_page_end(src_u_16)
|
||||
free_aligned_buffer_page_end(src_v_16)
|
||||
|
||||
return max_diff;
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
// The following adjustments in dimensions ensure the scale factor will be
|
||||
@ -285,32 +284,30 @@ static int TestFilter_16(int src_width, int src_height,
|
||||
#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
|
||||
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
||||
|
||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
|
||||
int diff = TestFilter(SX(benchmark_width_, nom, denom), \
|
||||
SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), \
|
||||
DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, DISABLED_ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = TestFilter_16(SX(benchmark_width_, nom, denom), \
|
||||
SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), \
|
||||
DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
|
||||
int diff = TestFilter( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, DISABLED_ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = TestFilter_16( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
|
||||
// filtering is different fixed point implementations for SSSE3, Neon and C.
|
||||
#define TEST_FACTOR(name, nom, denom, boxdiff) \
|
||||
TEST_FACTOR1(name, None, nom, denom, 0) \
|
||||
TEST_FACTOR1(name, Linear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Box, nom, denom, boxdiff)
|
||||
#define TEST_FACTOR(name, nom, denom, boxdiff) \
|
||||
TEST_FACTOR1(name, None, nom, denom, 0) \
|
||||
TEST_FACTOR1(name, Linear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Box, nom, denom, boxdiff)
|
||||
|
||||
TEST_FACTOR(2, 1, 2, 0)
|
||||
TEST_FACTOR(4, 1, 4, 0)
|
||||
@ -323,42 +320,40 @@ TEST_FACTOR(3, 1, 3, 0)
|
||||
#undef SX
|
||||
#undef DX
|
||||
|
||||
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
|
||||
int diff = TestFilter(benchmark_width_, benchmark_height_, \
|
||||
width, height, \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
|
||||
int diff = TestFilter(width, height, \
|
||||
Abs(benchmark_width_), Abs(benchmark_height_), \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
DISABLED_##name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = TestFilter_16(benchmark_width_, benchmark_height_, \
|
||||
width, height, \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
DISABLED_##name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = TestFilter_16(width, height, \
|
||||
Abs(benchmark_width_), Abs(benchmark_height_), \
|
||||
kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
|
||||
int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
|
||||
int diff = TestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
DISABLED_##name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
DISABLED_##name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
// Test scale to a specified size with all 4 filters.
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(name, width, height, None, 0) \
|
||||
TEST_SCALETO1(name, width, height, Linear, 0) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 0) \
|
||||
TEST_SCALETO1(name, width, height, Box, 0)
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(name, width, height, None, 0) \
|
||||
TEST_SCALETO1(name, width, height, Linear, 0) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 0) \
|
||||
TEST_SCALETO1(name, width, height, Box, 0)
|
||||
|
||||
TEST_SCALETO(Scale, 1, 1)
|
||||
TEST_SCALETO(Scale, 320, 240)
|
||||
|
||||
@ -25,18 +25,21 @@ unsigned int fastrand_seed = 0xfb;
|
||||
DEFINE_int32(libyuv_width, 0, "width of test image.");
|
||||
DEFINE_int32(libyuv_height, 0, "height of test image.");
|
||||
DEFINE_int32(libyuv_repeat, 0, "number of times to repeat test.");
|
||||
DEFINE_int32(libyuv_flags, 0,
|
||||
"cpu flags for reference code. 1 = C, -1 = SIMD");
|
||||
DEFINE_int32(libyuv_cpu_info, 0,
|
||||
DEFINE_int32(libyuv_flags, 0, "cpu flags for reference code. 1 = C, -1 = SIMD");
|
||||
DEFINE_int32(libyuv_cpu_info,
|
||||
0,
|
||||
"cpu flags for benchmark code. 1 = C, -1 = SIMD");
|
||||
|
||||
// For quicker unittests, default is 128 x 72. But when benchmarking,
|
||||
// default to 720p. Allow size to specify.
|
||||
// Set flags to -1 for benchmarking to avoid slower C code.
|
||||
|
||||
LibYUVConvertTest::LibYUVConvertTest() :
|
||||
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128),
|
||||
benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) {
|
||||
LibYUVConvertTest::LibYUVConvertTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
benchmark_cpu_info_(-1) {
|
||||
const char* repeat = getenv("LIBYUV_REPEAT");
|
||||
if (repeat) {
|
||||
benchmark_iterations_ = atoi(repeat); // NOLINT
|
||||
@ -76,19 +79,26 @@ LibYUVConvertTest::LibYUVConvertTest() :
|
||||
if (FLAGS_libyuv_cpu_info) {
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
benchmark_pixels_div256_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 255.0) / 256.0);
|
||||
benchmark_pixels_div1280_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
1279.0) /
|
||||
1280.0);
|
||||
}
|
||||
|
||||
LibYUVColorTest::LibYUVColorTest() :
|
||||
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128),
|
||||
benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) {
|
||||
LibYUVColorTest::LibYUVColorTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
benchmark_cpu_info_(-1) {
|
||||
const char* repeat = getenv("LIBYUV_REPEAT");
|
||||
if (repeat) {
|
||||
benchmark_iterations_ = atoi(repeat); // NOLINT
|
||||
@ -128,19 +138,26 @@ LibYUVColorTest::LibYUVColorTest() :
|
||||
if (FLAGS_libyuv_cpu_info) {
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
benchmark_pixels_div256_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 255.0) / 256.0);
|
||||
benchmark_pixels_div1280_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
1279.0) /
|
||||
1280.0);
|
||||
}
|
||||
|
||||
LibYUVScaleTest::LibYUVScaleTest() :
|
||||
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128),
|
||||
benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) {
|
||||
LibYUVScaleTest::LibYUVScaleTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
benchmark_cpu_info_(-1) {
|
||||
const char* repeat = getenv("LIBYUV_REPEAT");
|
||||
if (repeat) {
|
||||
benchmark_iterations_ = atoi(repeat); // NOLINT
|
||||
@ -180,19 +197,26 @@ LibYUVScaleTest::LibYUVScaleTest() :
|
||||
if (FLAGS_libyuv_cpu_info) {
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
benchmark_pixels_div256_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 255.0) / 256.0);
|
||||
benchmark_pixels_div1280_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
1279.0) /
|
||||
1280.0);
|
||||
}
|
||||
|
||||
LibYUVRotateTest::LibYUVRotateTest() :
|
||||
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128),
|
||||
benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) {
|
||||
LibYUVRotateTest::LibYUVRotateTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
benchmark_cpu_info_(-1) {
|
||||
const char* repeat = getenv("LIBYUV_REPEAT");
|
||||
if (repeat) {
|
||||
benchmark_iterations_ = atoi(repeat); // NOLINT
|
||||
@ -232,19 +256,26 @@ LibYUVRotateTest::LibYUVRotateTest() :
|
||||
if (FLAGS_libyuv_cpu_info) {
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
benchmark_pixels_div256_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 255.0) / 256.0);
|
||||
benchmark_pixels_div1280_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
1279.0) /
|
||||
1280.0);
|
||||
}
|
||||
|
||||
LibYUVPlanarTest::LibYUVPlanarTest() :
|
||||
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128),
|
||||
benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) {
|
||||
LibYUVPlanarTest::LibYUVPlanarTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
benchmark_cpu_info_(-1) {
|
||||
const char* repeat = getenv("LIBYUV_REPEAT");
|
||||
if (repeat) {
|
||||
benchmark_iterations_ = atoi(repeat); // NOLINT
|
||||
@ -284,19 +315,26 @@ LibYUVPlanarTest::LibYUVPlanarTest() :
|
||||
if (FLAGS_libyuv_cpu_info) {
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
benchmark_pixels_div256_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 255.0) / 256.0);
|
||||
benchmark_pixels_div1280_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
1279.0) /
|
||||
1280.0);
|
||||
}
|
||||
|
||||
LibYUVBaseTest::LibYUVBaseTest() :
|
||||
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(128),
|
||||
benchmark_height_(72), disable_cpu_flags_(1), benchmark_cpu_info_(-1) {
|
||||
LibYUVBaseTest::LibYUVBaseTest()
|
||||
: benchmark_iterations_(BENCHMARK_ITERATIONS),
|
||||
benchmark_width_(128),
|
||||
benchmark_height_(72),
|
||||
disable_cpu_flags_(1),
|
||||
benchmark_cpu_info_(-1) {
|
||||
const char* repeat = getenv("LIBYUV_REPEAT");
|
||||
if (repeat) {
|
||||
benchmark_iterations_ = atoi(repeat); // NOLINT
|
||||
@ -336,14 +374,18 @@ LibYUVBaseTest::LibYUVBaseTest() :
|
||||
if (FLAGS_libyuv_cpu_info) {
|
||||
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
|
||||
}
|
||||
benchmark_pixels_div256_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 255.0) / 256.0);
|
||||
benchmark_pixels_div1280_ = static_cast<int>((
|
||||
static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) + 1279.0) / 1280.0);
|
||||
benchmark_pixels_div256_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
255.0) /
|
||||
256.0);
|
||||
benchmark_pixels_div1280_ =
|
||||
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
|
||||
static_cast<double>(Abs(benchmark_height_)) *
|
||||
static_cast<double>(benchmark_iterations_) +
|
||||
1279.0) /
|
||||
1280.0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
@ -14,8 +14,8 @@
|
||||
#ifdef WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
@ -54,8 +54,10 @@ static __inline int Abs(int v) {
|
||||
static const int kMaxWidth = 32768;
|
||||
static const int kMaxHeight = 32768;
|
||||
|
||||
static inline bool SizeValid(int src_width, int src_height,
|
||||
int dst_width, int dst_height) {
|
||||
static inline bool SizeValid(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (src_width > kMaxWidth || src_height > kMaxHeight ||
|
||||
dst_width > kMaxWidth || dst_height > kMaxHeight) {
|
||||
printf("Warning - size too large to test. Skipping\n");
|
||||
@ -64,15 +66,16 @@ static inline bool SizeValid(int src_width, int src_height,
|
||||
return true;
|
||||
}
|
||||
|
||||
#define align_buffer_page_end(var, size) \
|
||||
uint8* var; \
|
||||
uint8* var##_mem; \
|
||||
var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \
|
||||
var = (uint8*)((intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - \
|
||||
(size)) & ~63);
|
||||
#define align_buffer_page_end(var, size) \
|
||||
uint8* var; \
|
||||
uint8* var##_mem; \
|
||||
var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \
|
||||
var = (uint8*)((intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - \
|
||||
(size)) & \
|
||||
~63);
|
||||
|
||||
#define free_aligned_buffer_page_end(var) \
|
||||
free(var##_mem); \
|
||||
free(var##_mem); \
|
||||
var = 0;
|
||||
|
||||
#ifdef WIN32
|
||||
@ -122,78 +125,78 @@ class LibYUVColorTest : public ::testing::Test {
|
||||
protected:
|
||||
LibYUVColorTest();
|
||||
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
|
||||
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
};
|
||||
|
||||
class LibYUVConvertTest : public ::testing::Test {
|
||||
protected:
|
||||
LibYUVConvertTest();
|
||||
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
|
||||
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
};
|
||||
|
||||
class LibYUVScaleTest : public ::testing::Test {
|
||||
protected:
|
||||
LibYUVScaleTest();
|
||||
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
|
||||
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
};
|
||||
|
||||
class LibYUVRotateTest : public ::testing::Test {
|
||||
protected:
|
||||
LibYUVRotateTest();
|
||||
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
|
||||
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
};
|
||||
|
||||
class LibYUVPlanarTest : public ::testing::Test {
|
||||
protected:
|
||||
LibYUVPlanarTest();
|
||||
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
|
||||
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
};
|
||||
|
||||
class LibYUVBaseTest : public ::testing::Test {
|
||||
protected:
|
||||
LibYUVBaseTest();
|
||||
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_iterations_; // Default 1. Use 1000 for benchmarking.
|
||||
int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA.
|
||||
int benchmark_height_; // Default 720. Use 360 for benchmarking VGA.
|
||||
int benchmark_pixels_div256_; // Total pixels to benchmark / 256.
|
||||
int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
int disable_cpu_flags_; // Default 1. Use -1 for benchmarking.
|
||||
int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.
|
||||
};
|
||||
|
||||
#endif // UNIT_TEST_UNIT_TEST_H_ NOLINT
|
||||
|
||||
@ -11,26 +11,23 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libyuv/video_common.h"
|
||||
#include "../unit_test/unit_test.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
// Tests FourCC codes in video common, which are used for ConvertToI420().
|
||||
|
||||
static bool TestValidChar(uint32 onecc) {
|
||||
if ((onecc >= '0' && onecc <= '9') ||
|
||||
(onecc >= 'A' && onecc <= 'Z') ||
|
||||
(onecc >= 'a' && onecc <= 'z') ||
|
||||
(onecc == ' ') || (onecc == 0xff)) {
|
||||
if ((onecc >= '0' && onecc <= '9') || (onecc >= 'A' && onecc <= 'Z') ||
|
||||
(onecc >= 'a' && onecc <= 'z') || (onecc == ' ') || (onecc == 0xff)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool TestValidFourCC(uint32 fourcc, int bpp) {
|
||||
if (!TestValidChar(fourcc & 0xff) ||
|
||||
!TestValidChar((fourcc >> 8) & 0xff) ||
|
||||
if (!TestValidChar(fourcc & 0xff) || !TestValidChar((fourcc >> 8) & 0xff) ||
|
||||
!TestValidChar((fourcc >> 16) & 0xff) ||
|
||||
!TestValidChar((fourcc >> 24) & 0xff)) {
|
||||
return false;
|
||||
@ -52,10 +49,10 @@ TEST_F(LibYUVBaseTest, TestCanonicalFourCC) {
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565));
|
||||
EXPECT_EQ(static_cast<uint32>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551));
|
||||
@ -77,7 +74,7 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO));
|
||||
@ -100,7 +97,7 @@ TEST_F(LibYUVBaseTest, TestFourCC) {
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_RGB3, FOURCC_BPP_RGB3));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_BGR3, FOURCC_BPP_BGR3));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_H264, FOURCC_BPP_H264));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_ANY, FOURCC_BPP_ANY));
|
||||
EXPECT_TRUE(TestValidFourCC(FOURCC_ANY, FOURCC_BPP_ANY));
|
||||
}
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
@ -39,10 +39,12 @@ int main(int argc, char** argv) {
|
||||
int amt2 = 0;
|
||||
do {
|
||||
amt1 = static_cast<int>(fread(buf1, 1, kBlockSize, fin1));
|
||||
if (amt1 > 0) hash1 = libyuv::HashDjb2(buf1, amt1, hash1);
|
||||
if (amt1 > 0)
|
||||
hash1 = libyuv::HashDjb2(buf1, amt1, hash1);
|
||||
if (fin2) {
|
||||
amt2 = static_cast<int>(fread(buf2, 1, kBlockSize, fin2));
|
||||
if (amt2 > 0) hash2 = libyuv::HashDjb2(buf2, amt2, hash2);
|
||||
if (amt2 > 0)
|
||||
hash2 = libyuv::HashDjb2(buf2, amt2, hash2);
|
||||
int amt_min = (amt1 < amt2) ? amt1 : amt2;
|
||||
size_min += amt_min;
|
||||
sum_square_err += libyuv::ComputeSumSquareError(buf1, buf2, amt_min);
|
||||
@ -52,8 +54,8 @@ int main(int argc, char** argv) {
|
||||
printf("hash1 %x", hash1);
|
||||
if (fin2) {
|
||||
printf(", hash2 %x", hash2);
|
||||
double mse = static_cast<double>(sum_square_err) /
|
||||
static_cast<double>(size_min);
|
||||
double mse =
|
||||
static_cast<double>(sum_square_err) / static_cast<double>(size_min);
|
||||
printf(", mse %.2f", mse);
|
||||
double psnr = libyuv::SumSquareErrorToPsnr(sum_square_err, size_min);
|
||||
printf(", psnr %.2f\n", psnr);
|
||||
|
||||
141
util/convert.cc
141
util/convert.cc
@ -29,13 +29,13 @@ bool verbose = false;
|
||||
bool attenuate = false;
|
||||
bool unattenuate = false;
|
||||
int image_width = 0, image_height = 0; // original width and height
|
||||
int dst_width = 0, dst_height = 0; // new width and height
|
||||
int dst_width = 0, dst_height = 0; // new width and height
|
||||
int fileindex_org = 0; // argv argument contains the original file name.
|
||||
int fileindex_rec = 0; // argv argument contains the reconstructed file name.
|
||||
int num_rec = 0; // Number of reconstructed images.
|
||||
int num_skip_org = 0; // Number of frames to skip in original.
|
||||
int num_frames = 0; // Number of frames to convert.
|
||||
int filter = 1; // Bilinear filter for scaling.
|
||||
int num_rec = 0; // Number of reconstructed images.
|
||||
int num_skip_org = 0; // Number of frames to skip in original.
|
||||
int num_frames = 0; // Number of frames to convert.
|
||||
int filter = 1; // Bilinear filter for scaling.
|
||||
|
||||
static __inline uint32 Abs(int32 v) {
|
||||
return v >= 0 ? v : -v;
|
||||
@ -48,8 +48,8 @@ bool ExtractResolutionFromFilename(const char* name,
|
||||
// Isolate the .width_height. section of the filename by searching for a
|
||||
// dot or underscore followed by a digit.
|
||||
for (int i = 0; name[i]; ++i) {
|
||||
if ((name[i] == '.' || name[i] == '_') &&
|
||||
name[i + 1] >= '0' && name[i + 1] <= '9') {
|
||||
if ((name[i] == '.' || name[i] == '_') && name[i + 1] >= '0' &&
|
||||
name[i + 1] <= '9') {
|
||||
int n = sscanf(name + i + 1, "%dx%d", width_ptr, height_ptr); // NOLINT
|
||||
if (2 == n) {
|
||||
return true;
|
||||
@ -59,13 +59,14 @@ bool ExtractResolutionFromFilename(const char* name,
|
||||
return false;
|
||||
}
|
||||
|
||||
void PrintHelp(const char * program) {
|
||||
void PrintHelp(const char* program) {
|
||||
printf("%s [-options] src_argb.raw dst_yuv.raw\n", program);
|
||||
printf(" -s <width> <height> .... specify source resolution. "
|
||||
"Optional if name contains\n"
|
||||
" resolution (ie. "
|
||||
"name.1920x800_24Hz_P420.yuv)\n"
|
||||
" Negative value mirrors.\n");
|
||||
printf(
|
||||
" -s <width> <height> .... specify source resolution. "
|
||||
"Optional if name contains\n"
|
||||
" resolution (ie. "
|
||||
"name.1920x800_24Hz_P420.yuv)\n"
|
||||
" Negative value mirrors.\n");
|
||||
printf(" -d <width> <height> .... specify destination resolution.\n");
|
||||
printf(" -f <filter> ............ 0 = point, 1 = bilinear (default).\n");
|
||||
printf(" -skip <src_argb> ....... Number of frame to skip of src_argb\n");
|
||||
@ -78,7 +79,8 @@ void PrintHelp(const char * program) {
|
||||
}
|
||||
|
||||
void ParseOptions(int argc, const char* argv[]) {
|
||||
if (argc <= 1) PrintHelp(argv[0]);
|
||||
if (argc <= 1)
|
||||
PrintHelp(argv[0]);
|
||||
for (int c = 1; c < argc; ++c) {
|
||||
if (!strcmp(argv[c], "-v")) {
|
||||
verbose = true;
|
||||
@ -89,17 +91,17 @@ void ParseOptions(int argc, const char* argv[]) {
|
||||
} else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
|
||||
PrintHelp(argv[0]);
|
||||
} else if (!strcmp(argv[c], "-s") && c + 2 < argc) {
|
||||
image_width = atoi(argv[++c]); // NOLINT
|
||||
image_height = atoi(argv[++c]); // NOLINT
|
||||
image_width = atoi(argv[++c]); // NOLINT
|
||||
image_height = atoi(argv[++c]); // NOLINT
|
||||
} else if (!strcmp(argv[c], "-d") && c + 2 < argc) {
|
||||
dst_width = atoi(argv[++c]); // NOLINT
|
||||
dst_height = atoi(argv[++c]); // NOLINT
|
||||
dst_width = atoi(argv[++c]); // NOLINT
|
||||
dst_height = atoi(argv[++c]); // NOLINT
|
||||
} else if (!strcmp(argv[c], "-skip") && c + 1 < argc) {
|
||||
num_skip_org = atoi(argv[++c]); // NOLINT
|
||||
num_skip_org = atoi(argv[++c]); // NOLINT
|
||||
} else if (!strcmp(argv[c], "-frames") && c + 1 < argc) {
|
||||
num_frames = atoi(argv[++c]); // NOLINT
|
||||
num_frames = atoi(argv[++c]); // NOLINT
|
||||
} else if (!strcmp(argv[c], "-f") && c + 1 < argc) {
|
||||
filter = atoi(argv[++c]); // NOLINT
|
||||
filter = atoi(argv[++c]); // NOLINT
|
||||
} else if (argv[c][0] == '-') {
|
||||
fprintf(stderr, "Unknown option. %s\n", argv[c]);
|
||||
} else if (fileindex_org == 0) {
|
||||
@ -127,11 +129,9 @@ void ParseOptions(int argc, const char* argv[]) {
|
||||
int org_width, org_height;
|
||||
int rec_width, rec_height;
|
||||
bool org_res_avail = ExtractResolutionFromFilename(argv[fileindex_org],
|
||||
&org_width,
|
||||
&org_height);
|
||||
&org_width, &org_height);
|
||||
bool rec_res_avail = ExtractResolutionFromFilename(argv[fileindex_rec],
|
||||
&rec_width,
|
||||
&rec_height);
|
||||
&rec_width, &rec_height);
|
||||
if (image_width == 0 || image_height == 0) {
|
||||
if (org_res_avail) {
|
||||
image_width = org_width;
|
||||
@ -158,10 +158,14 @@ void ParseOptions(int argc, const char* argv[]) {
|
||||
static const int kTileX = 32;
|
||||
static const int kTileY = 32;
|
||||
|
||||
static int TileARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
static int TileARGBScale(const uint8* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::FilterMode filtering) {
|
||||
for (int y = 0; y < dst_height; y += kTileY) {
|
||||
for (int x = 0; x < dst_width; x += kTileX) {
|
||||
@ -173,11 +177,10 @@ static int TileARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
if (y + clip_height > dst_height) {
|
||||
clip_height = dst_height - y;
|
||||
}
|
||||
int r = libyuv::ARGBScaleClip(src_argb, src_stride_argb,
|
||||
src_width, src_height,
|
||||
dst_argb, dst_stride_argb,
|
||||
dst_width, dst_height,
|
||||
x, y, clip_width, clip_height, filtering);
|
||||
int r = libyuv::ARGBScaleClip(src_argb, src_stride_argb, src_width,
|
||||
src_height, dst_argb, dst_stride_argb,
|
||||
dst_width, dst_height, x, y, clip_width,
|
||||
clip_height, filtering);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
@ -197,8 +200,8 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
// Open all files to convert to
|
||||
FILE** file_rec = new FILE* [num_rec];
|
||||
memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT
|
||||
FILE** file_rec = new FILE*[num_rec];
|
||||
memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT
|
||||
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
|
||||
file_rec[cur_rec] = fopen(argv[fileindex_rec + cur_rec], "wb");
|
||||
if (file_rec[cur_rec] == NULL) {
|
||||
@ -222,8 +225,8 @@ int main(int argc, const char* argv[]) {
|
||||
// Input is YUV
|
||||
if (org_is_yuv) {
|
||||
const int y_size = Abs(image_width) * Abs(image_height);
|
||||
const int uv_size = ((Abs(image_width) + 1) / 2) *
|
||||
((Abs(image_height) + 1) / 2);
|
||||
const int uv_size =
|
||||
((Abs(image_width) + 1) / 2) * ((Abs(image_height) + 1) / 2);
|
||||
org_size = y_size + 2 * uv_size; // YUV original.
|
||||
}
|
||||
|
||||
@ -233,8 +236,8 @@ int main(int argc, const char* argv[]) {
|
||||
const size_t total_size = y_size + 2 * uv_size;
|
||||
#if defined(_MSC_VER)
|
||||
_fseeki64(file_org,
|
||||
static_cast<__int64>(num_skip_org) *
|
||||
static_cast<__int64>(org_size), SEEK_SET);
|
||||
static_cast<__int64>(num_skip_org) * static_cast<__int64>(org_size),
|
||||
SEEK_SET);
|
||||
#else
|
||||
fseek(file_org, num_skip_org * total_size, SEEK_SET);
|
||||
#endif
|
||||
@ -256,18 +259,18 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
printf("Size: %dx%d to %dx%d\n", image_width, image_height,
|
||||
dst_width, dst_height);
|
||||
printf("Size: %dx%d to %dx%d\n", image_width, image_height, dst_width,
|
||||
dst_height);
|
||||
}
|
||||
|
||||
int number_of_frames;
|
||||
for (number_of_frames = 0; ; ++number_of_frames) {
|
||||
for (number_of_frames = 0;; ++number_of_frames) {
|
||||
if (num_frames && number_of_frames >= num_frames)
|
||||
break;
|
||||
|
||||
// Load original YUV or ARGB frame.
|
||||
size_t bytes_org = fread(ch_org, sizeof(uint8),
|
||||
static_cast<size_t>(org_size), file_org);
|
||||
size_t bytes_org =
|
||||
fread(ch_org, sizeof(uint8), static_cast<size_t>(org_size), file_org);
|
||||
if (bytes_org < static_cast<size_t>(org_size))
|
||||
break;
|
||||
|
||||
@ -290,22 +293,17 @@ int main(int argc, const char* argv[]) {
|
||||
int half_src_height = (src_height + 1) / 2;
|
||||
int half_dst_width = (dst_width + 1) / 2;
|
||||
int half_dst_height = (dst_height + 1) / 2;
|
||||
I420Scale(ch_org, src_width,
|
||||
ch_org + src_width * src_height, half_src_width,
|
||||
ch_org + src_width * src_height +
|
||||
half_src_width * half_src_height, half_src_width,
|
||||
image_width, image_height,
|
||||
ch_rec, dst_width,
|
||||
ch_rec + dst_width * dst_height, half_dst_width,
|
||||
ch_rec + dst_width * dst_height +
|
||||
half_dst_width * half_dst_height, half_dst_width,
|
||||
dst_width, dst_height,
|
||||
static_cast<libyuv::FilterMode>(filter));
|
||||
I420Scale(
|
||||
ch_org, src_width, ch_org + src_width * src_height, half_src_width,
|
||||
ch_org + src_width * src_height + half_src_width * half_src_height,
|
||||
half_src_width, image_width, image_height, ch_rec, dst_width,
|
||||
ch_rec + dst_width * dst_height, half_dst_width,
|
||||
ch_rec + dst_width * dst_height + half_dst_width * half_dst_height,
|
||||
half_dst_width, dst_width, dst_height,
|
||||
static_cast<libyuv::FilterMode>(filter));
|
||||
} else {
|
||||
TileARGBScale(ch_org, Abs(image_width) * 4,
|
||||
image_width, image_height,
|
||||
ch_dst, dst_width * 4,
|
||||
dst_width, dst_height,
|
||||
TileARGBScale(ch_org, Abs(image_width) * 4, image_width, image_height,
|
||||
ch_dst, dst_width * 4, dst_width, dst_height,
|
||||
static_cast<libyuv::FilterMode>(filter));
|
||||
}
|
||||
bool rec_is_yuv = strstr(argv[fileindex_rec + cur_rec], "_P420.") != NULL;
|
||||
@ -321,25 +319,24 @@ int main(int argc, const char* argv[]) {
|
||||
if (!org_is_yuv && rec_is_yuv) {
|
||||
int half_width = (dst_width + 1) / 2;
|
||||
int half_height = (dst_height + 1) / 2;
|
||||
libyuv::ARGBToI420(ch_dst, dst_width * 4,
|
||||
ch_rec, dst_width,
|
||||
ch_rec + dst_width * dst_height, half_width,
|
||||
ch_rec + dst_width * dst_height +
|
||||
half_width * half_height, half_width,
|
||||
dst_width, dst_height);
|
||||
libyuv::ARGBToI420(
|
||||
ch_dst, dst_width * 4, ch_rec, dst_width,
|
||||
ch_rec + dst_width * dst_height, half_width,
|
||||
ch_rec + dst_width * dst_height + half_width * half_height,
|
||||
half_width, dst_width, dst_height);
|
||||
}
|
||||
|
||||
// Output YUV or ARGB frame.
|
||||
if (rec_is_yuv) {
|
||||
size_t bytes_rec = fwrite(ch_rec, sizeof(uint8),
|
||||
static_cast<size_t>(total_size),
|
||||
file_rec[cur_rec]);
|
||||
size_t bytes_rec =
|
||||
fwrite(ch_rec, sizeof(uint8), static_cast<size_t>(total_size),
|
||||
file_rec[cur_rec]);
|
||||
if (bytes_rec < static_cast<size_t>(total_size))
|
||||
break;
|
||||
} else {
|
||||
size_t bytes_rec = fwrite(ch_dst, sizeof(uint8),
|
||||
static_cast<size_t>(dst_size),
|
||||
file_rec[cur_rec]);
|
||||
size_t bytes_rec =
|
||||
fwrite(ch_dst, sizeof(uint8), static_cast<size_t>(dst_size),
|
||||
file_rec[cur_rec]);
|
||||
if (bytes_rec < static_cast<size_t>(dst_size))
|
||||
break;
|
||||
}
|
||||
|
||||
232
util/psnr.cc
232
util/psnr.cc
@ -27,7 +27,7 @@ typedef unsigned __int64 uint64;
|
||||
#else // COMPILER_MSVC
|
||||
#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
|
||||
typedef unsigned long uint64; // NOLINT
|
||||
#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
|
||||
#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
|
||||
typedef unsigned long long uint64; // NOLINT
|
||||
#endif // __LP64__
|
||||
#endif // _MSC_VER
|
||||
@ -39,85 +39,81 @@ typedef unsigned long long uint64; // NOLINT
|
||||
!defined(__aarch64__)
|
||||
#define HAS_SUMSQUAREERROR_NEON
|
||||
static uint32 SumSquareError_NEON(const uint8* src_a,
|
||||
const uint8* src_b, int count) {
|
||||
const uint8* src_b,
|
||||
int count) {
|
||||
volatile uint32 sse;
|
||||
asm volatile (
|
||||
"vmov.u8 q7, #0 \n"
|
||||
"vmov.u8 q9, #0 \n"
|
||||
"vmov.u8 q8, #0 \n"
|
||||
"vmov.u8 q10, #0 \n"
|
||||
asm volatile(
|
||||
"vmov.u8 q7, #0 \n"
|
||||
"vmov.u8 q9, #0 \n"
|
||||
"vmov.u8 q8, #0 \n"
|
||||
"vmov.u8 q10, #0 \n"
|
||||
|
||||
"1: \n"
|
||||
"vld1.u8 {q0}, [%0]! \n"
|
||||
"vld1.u8 {q1}, [%1]! \n"
|
||||
"vsubl.u8 q2, d0, d2 \n"
|
||||
"vsubl.u8 q3, d1, d3 \n"
|
||||
"vmlal.s16 q7, d4, d4 \n"
|
||||
"vmlal.s16 q8, d6, d6 \n"
|
||||
"vmlal.s16 q8, d5, d5 \n"
|
||||
"vmlal.s16 q10, d7, d7 \n"
|
||||
"subs %2, %2, #16 \n"
|
||||
"bhi 1b \n"
|
||||
"1: \n"
|
||||
"vld1.u8 {q0}, [%0]! \n"
|
||||
"vld1.u8 {q1}, [%1]! \n"
|
||||
"vsubl.u8 q2, d0, d2 \n"
|
||||
"vsubl.u8 q3, d1, d3 \n"
|
||||
"vmlal.s16 q7, d4, d4 \n"
|
||||
"vmlal.s16 q8, d6, d6 \n"
|
||||
"vmlal.s16 q8, d5, d5 \n"
|
||||
"vmlal.s16 q10, d7, d7 \n"
|
||||
"subs %2, %2, #16 \n"
|
||||
"bhi 1b \n"
|
||||
|
||||
"vadd.u32 q7, q7, q8 \n"
|
||||
"vadd.u32 q9, q9, q10 \n"
|
||||
"vadd.u32 q10, q7, q9 \n"
|
||||
"vpaddl.u32 q1, q10 \n"
|
||||
"vadd.u64 d0, d2, d3 \n"
|
||||
"vmov.32 %3, d0[0] \n"
|
||||
: "+r"(src_a),
|
||||
"+r"(src_b),
|
||||
"+r"(count),
|
||||
"=r"(sse)
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10");
|
||||
"vadd.u32 q7, q7, q8 \n"
|
||||
"vadd.u32 q9, q9, q10 \n"
|
||||
"vadd.u32 q10, q7, q9 \n"
|
||||
"vpaddl.u32 q1, q10 \n"
|
||||
"vadd.u64 d0, d2, d3 \n"
|
||||
"vmov.32 %3, d0[0] \n"
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10");
|
||||
return sse;
|
||||
}
|
||||
#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
#define HAS_SUMSQUAREERROR_NEON
|
||||
static uint32 SumSquareError_NEON(const uint8* src_a,
|
||||
const uint8* src_b, int count) {
|
||||
const uint8* src_b,
|
||||
int count) {
|
||||
volatile uint32 sse;
|
||||
asm volatile (
|
||||
"eor v16.16b, v16.16b, v16.16b \n"
|
||||
"eor v18.16b, v18.16b, v18.16b \n"
|
||||
"eor v17.16b, v17.16b, v17.16b \n"
|
||||
"eor v19.16b, v19.16b, v19.16b \n"
|
||||
asm volatile(
|
||||
"eor v16.16b, v16.16b, v16.16b \n"
|
||||
"eor v18.16b, v18.16b, v18.16b \n"
|
||||
"eor v17.16b, v17.16b, v17.16b \n"
|
||||
"eor v19.16b, v19.16b, v19.16b \n"
|
||||
|
||||
"1: \n"
|
||||
"ld1 {v0.16b}, [%0], #16 \n"
|
||||
"ld1 {v1.16b}, [%1], #16 \n"
|
||||
"subs %w2, %w2, #16 \n"
|
||||
"usubl v2.8h, v0.8b, v1.8b \n"
|
||||
"usubl2 v3.8h, v0.16b, v1.16b \n"
|
||||
"smlal v16.4s, v2.4h, v2.4h \n"
|
||||
"smlal v17.4s, v3.4h, v3.4h \n"
|
||||
"smlal2 v18.4s, v2.8h, v2.8h \n"
|
||||
"smlal2 v19.4s, v3.8h, v3.8h \n"
|
||||
"b.gt 1b \n"
|
||||
"1: \n"
|
||||
"ld1 {v0.16b}, [%0], #16 \n"
|
||||
"ld1 {v1.16b}, [%1], #16 \n"
|
||||
"subs %w2, %w2, #16 \n"
|
||||
"usubl v2.8h, v0.8b, v1.8b \n"
|
||||
"usubl2 v3.8h, v0.16b, v1.16b \n"
|
||||
"smlal v16.4s, v2.4h, v2.4h \n"
|
||||
"smlal v17.4s, v3.4h, v3.4h \n"
|
||||
"smlal2 v18.4s, v2.8h, v2.8h \n"
|
||||
"smlal2 v19.4s, v3.8h, v3.8h \n"
|
||||
"b.gt 1b \n"
|
||||
|
||||
"add v16.4s, v16.4s, v17.4s \n"
|
||||
"add v18.4s, v18.4s, v19.4s \n"
|
||||
"add v19.4s, v16.4s, v18.4s \n"
|
||||
"addv s0, v19.4s \n"
|
||||
"fmov %w3, s0 \n"
|
||||
: "+r"(src_a),
|
||||
"+r"(src_b),
|
||||
"+r"(count),
|
||||
"=r"(sse)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
|
||||
"add v16.4s, v16.4s, v17.4s \n"
|
||||
"add v18.4s, v18.4s, v19.4s \n"
|
||||
"add v19.4s, v16.4s, v18.4s \n"
|
||||
"addv s0, v19.4s \n"
|
||||
"fmov %w3, s0 \n"
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
|
||||
return sse;
|
||||
}
|
||||
#elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
#define HAS_SUMSQUAREERROR_SSE2
|
||||
__declspec(naked)
|
||||
static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,
|
||||
const uint8* /*src_b*/, int /*count*/) {
|
||||
__declspec(naked) static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,
|
||||
const uint8* /*src_b*/,
|
||||
int /*count*/) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm5, xmm5
|
||||
sub edx, eax
|
||||
@ -151,47 +147,49 @@ static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,
|
||||
#elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_SUMSQUAREERROR_SSE2
|
||||
static uint32 SumSquareError_SSE2(const uint8* src_a,
|
||||
const uint8* src_b, int count) {
|
||||
const uint8* src_b,
|
||||
int count) {
|
||||
uint32 sse;
|
||||
asm volatile ( // NOLINT
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
"sub %0,%1 \n"
|
||||
asm volatile( // NOLINT
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"movdqu (%0,%1,1),%%xmm2 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqu %%xmm1,%%xmm3 \n"
|
||||
"psubusb %%xmm2,%%xmm1 \n"
|
||||
"psubusb %%xmm3,%%xmm2 \n"
|
||||
"por %%xmm2,%%xmm1 \n"
|
||||
"movdqu %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"punpckhbw %%xmm5,%%xmm2 \n"
|
||||
"pmaddwd %%xmm1,%%xmm1 \n"
|
||||
"pmaddwd %%xmm2,%%xmm2 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"paddd %%xmm2,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"ja 1b \n"
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"movdqu (%0,%1,1),%%xmm2 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqu %%xmm1,%%xmm3 \n"
|
||||
"psubusb %%xmm2,%%xmm1 \n"
|
||||
"psubusb %%xmm3,%%xmm2 \n"
|
||||
"por %%xmm2,%%xmm1 \n"
|
||||
"movdqu %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"punpckhbw %%xmm5,%%xmm2 \n"
|
||||
"pmaddwd %%xmm1,%%xmm1 \n"
|
||||
"pmaddwd %%xmm2,%%xmm2 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"paddd %%xmm2,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"ja 1b \n"
|
||||
|
||||
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=g"(sse) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=g"(sse) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
,
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
); // NOLINT
|
||||
); // NOLINT
|
||||
return sse;
|
||||
}
|
||||
#endif // LIBYUV_DISABLE_X86 etc
|
||||
@ -199,20 +197,22 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
|
||||
#if defined(HAS_SUMSQUAREERROR_SSE2)
|
||||
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
|
||||
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
||||
asm volatile ( // NOLINT
|
||||
"mov %%ebx, %%edi \n"
|
||||
"cpuid \n"
|
||||
"xchg %%edi, %%ebx \n"
|
||||
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||
: "a"(info_type));
|
||||
asm volatile( // NOLINT
|
||||
"mov %%ebx, %%edi \n"
|
||||
"cpuid \n"
|
||||
"xchg %%edi, %%ebx \n"
|
||||
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]),
|
||||
"=d"(cpu_info[3])
|
||||
: "a"(info_type));
|
||||
}
|
||||
// For gcc/clang but not clangcl.
|
||||
#elif (defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)
|
||||
#elif !defined(_MSC_VER) && (defined(__i386__) || defined(__x86_64__))
|
||||
static __inline void __cpuid(int cpu_info[4], int info_type) {
|
||||
asm volatile ( // NOLINT
|
||||
"cpuid \n"
|
||||
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
||||
: "a"(info_type));
|
||||
asm volatile( // NOLINT
|
||||
"cpuid \n"
|
||||
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
|
||||
"=d"(cpu_info[3])
|
||||
: "a"(info_type));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -229,7 +229,8 @@ static int CpuHasSSE2() {
|
||||
#endif // HAS_SUMSQUAREERROR_SSE2
|
||||
|
||||
static uint32 SumSquareError_C(const uint8* src_a,
|
||||
const uint8* src_b, int count) {
|
||||
const uint8* src_b,
|
||||
int count) {
|
||||
uint32 sse = 0u;
|
||||
for (int x = 0; x < count; ++x) {
|
||||
int diff = src_a[x] - src_b[x];
|
||||
@ -239,9 +240,10 @@ static uint32 SumSquareError_C(const uint8* src_a,
|
||||
}
|
||||
|
||||
double ComputeSumSquareError(const uint8* src_a,
|
||||
const uint8* src_b, int count) {
|
||||
uint32 (*SumSquareError)(const uint8* src_a,
|
||||
const uint8* src_b, int count) = SumSquareError_C;
|
||||
const uint8* src_b,
|
||||
int count) {
|
||||
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
|
||||
SumSquareError_C;
|
||||
#if defined(HAS_SUMSQUAREERROR_NEON)
|
||||
SumSquareError = SumSquareError_NEON;
|
||||
#endif
|
||||
@ -253,7 +255,7 @@ double ComputeSumSquareError(const uint8* src_a,
|
||||
const int kBlockSize = 1 << 15;
|
||||
uint64 sse = 0;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+: sse)
|
||||
#pragma omp parallel for reduction(+ : sse)
|
||||
#endif
|
||||
for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
|
||||
|
||||
@ -71,8 +71,8 @@ bool ExtractResolutionFromFilename(const char* name,
|
||||
// Isolate the .width_height. section of the filename by searching for a
|
||||
// dot or underscore followed by a digit.
|
||||
for (int i = 0; name[i]; ++i) {
|
||||
if ((name[i] == '.' || name[i] == '_') &&
|
||||
name[i + 1] >= '0' && name[i + 1] <= '9') {
|
||||
if ((name[i] == '.' || name[i] == '_') && name[i + 1] >= '0' &&
|
||||
name[i + 1] <= '9') {
|
||||
int n = sscanf(name + i + 1, "%dx%d", width_ptr, height_ptr); // NOLINT
|
||||
if (2 == n) {
|
||||
return true;
|
||||
@ -88,7 +88,7 @@ bool ExtractResolutionFromFilename(const char* name,
|
||||
return false;
|
||||
}
|
||||
fseek(file_org, 0, SEEK_END);
|
||||
size_t total_size = ftell(file_org);
|
||||
size_t total_size = ftell(file_org);
|
||||
fseek(file_org, 0, SEEK_SET);
|
||||
uint8* const ch_org = new uint8[total_size];
|
||||
memset(ch_org, 0, total_size);
|
||||
@ -109,8 +109,10 @@ bool ExtractResolutionFromFilename(const char* name,
|
||||
// This can be useful when comparing codecs that are inconsistant about Y
|
||||
uint8 ScaleY(uint8 y) {
|
||||
int ny = (y - 16) * 256 / 224;
|
||||
if (ny < 0) ny = 0;
|
||||
if (ny > 255) ny = 255;
|
||||
if (ny < 0)
|
||||
ny = 0;
|
||||
if (ny > 255)
|
||||
ny = 255;
|
||||
return static_cast<uint8>(ny);
|
||||
}
|
||||
|
||||
@ -119,16 +121,18 @@ double GetMSE(double sse, double size) {
|
||||
return sse / size;
|
||||
}
|
||||
|
||||
void PrintHelp(const char * program) {
|
||||
void PrintHelp(const char* program) {
|
||||
printf("%s [-options] org_seq rec_seq [rec_seq2.. etc]\n", program);
|
||||
#ifdef HAVE_JPEG
|
||||
printf("jpeg or raw YUV 420 supported.\n");
|
||||
#endif
|
||||
printf("options:\n");
|
||||
printf(" -s <width> <height> .... specify YUV size, mandatory if none of the "
|
||||
"sequences have the\n");
|
||||
printf(" resolution embedded in their filename (ie. "
|
||||
"name.1920x800_24Hz_P420.yuv)\n");
|
||||
printf(
|
||||
" -s <width> <height> .... specify YUV size, mandatory if none of the "
|
||||
"sequences have the\n");
|
||||
printf(
|
||||
" resolution embedded in their filename (ie. "
|
||||
"name.1920x800_24Hz_P420.yuv)\n");
|
||||
printf(" -psnr .................. compute PSNR (default)\n");
|
||||
printf(" -ssim .................. compute SSIM\n");
|
||||
printf(" -mse ................... compute MSE\n");
|
||||
@ -146,7 +150,8 @@ void PrintHelp(const char * program) {
|
||||
}
|
||||
|
||||
void ParseOptions(int argc, const char* argv[]) {
|
||||
if (argc <= 1) PrintHelp(argv[0]);
|
||||
if (argc <= 1)
|
||||
PrintHelp(argv[0]);
|
||||
for (int c = 1; c < argc; ++c) {
|
||||
if (!strcmp(argv[c], "-v")) {
|
||||
verbose = true;
|
||||
@ -168,16 +173,16 @@ void ParseOptions(int argc, const char* argv[]) {
|
||||
} else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) {
|
||||
PrintHelp(argv[0]);
|
||||
} else if (!strcmp(argv[c], "-s") && c + 2 < argc) {
|
||||
image_width = atoi(argv[++c]); // NOLINT
|
||||
image_height = atoi(argv[++c]); // NOLINT
|
||||
image_width = atoi(argv[++c]); // NOLINT
|
||||
image_height = atoi(argv[++c]); // NOLINT
|
||||
} else if (!strcmp(argv[c], "-skip") && c + 2 < argc) {
|
||||
num_skip_org = atoi(argv[++c]); // NOLINT
|
||||
num_skip_rec = atoi(argv[++c]); // NOLINT
|
||||
num_skip_org = atoi(argv[++c]); // NOLINT
|
||||
num_skip_rec = atoi(argv[++c]); // NOLINT
|
||||
} else if (!strcmp(argv[c], "-frames") && c + 1 < argc) {
|
||||
num_frames = atoi(argv[++c]); // NOLINT
|
||||
num_frames = atoi(argv[++c]); // NOLINT
|
||||
#ifdef _OPENMP
|
||||
} else if (!strcmp(argv[c], "-t") && c + 1 < argc) {
|
||||
num_threads = atoi(argv[++c]); // NOLINT
|
||||
num_threads = atoi(argv[++c]); // NOLINT
|
||||
#endif
|
||||
} else if (argv[c][0] == '-') {
|
||||
fprintf(stderr, "Unknown option. %s\n", argv[c]);
|
||||
@ -206,11 +211,9 @@ void ParseOptions(int argc, const char* argv[]) {
|
||||
int org_width, org_height;
|
||||
int rec_width, rec_height;
|
||||
bool org_res_avail = ExtractResolutionFromFilename(argv[fileindex_org],
|
||||
&org_width,
|
||||
&org_height);
|
||||
&org_width, &org_height);
|
||||
bool rec_res_avail = ExtractResolutionFromFilename(argv[fileindex_rec],
|
||||
&rec_width,
|
||||
&rec_height);
|
||||
&rec_width, &rec_height);
|
||||
if (org_res_avail) {
|
||||
if (rec_res_avail) {
|
||||
if ((org_width == rec_width) && (org_height == rec_height)) {
|
||||
@ -234,11 +237,15 @@ void ParseOptions(int argc, const char* argv[]) {
|
||||
}
|
||||
}
|
||||
|
||||
bool UpdateMetrics(uint8* ch_org, uint8* ch_rec,
|
||||
const int y_size, const int uv_size, const size_t total_size,
|
||||
bool UpdateMetrics(uint8* ch_org,
|
||||
uint8* ch_rec,
|
||||
const int y_size,
|
||||
const int uv_size,
|
||||
const size_t total_size,
|
||||
int number_of_frames,
|
||||
metric* cur_distortion_psnr,
|
||||
metric* distorted_frame, bool do_psnr) {
|
||||
metric* distorted_frame,
|
||||
bool do_psnr) {
|
||||
const int uv_offset = (do_swap_uv ? uv_size : 0);
|
||||
const uint8* const u_org = ch_org + y_size + uv_offset;
|
||||
const uint8* const u_rec = ch_rec + y_size;
|
||||
@ -247,11 +254,11 @@ bool UpdateMetrics(uint8* ch_org, uint8* ch_rec,
|
||||
if (do_psnr) {
|
||||
#ifdef HAVE_JPEG
|
||||
double y_err = static_cast<double>(
|
||||
libyuv::ComputeSumSquareError(ch_org, ch_rec, y_size));
|
||||
libyuv::ComputeSumSquareError(ch_org, ch_rec, y_size));
|
||||
double u_err = static_cast<double>(
|
||||
libyuv::ComputeSumSquareError(u_org, u_rec, uv_size));
|
||||
libyuv::ComputeSumSquareError(u_org, u_rec, uv_size));
|
||||
double v_err = static_cast<double>(
|
||||
libyuv::ComputeSumSquareError(v_org, v_rec, uv_size));
|
||||
libyuv::ComputeSumSquareError(v_org, v_rec, uv_size));
|
||||
#else
|
||||
double y_err = ComputeSumSquareError(ch_org, ch_rec, y_size);
|
||||
double u_err = ComputeSumSquareError(u_org, u_rec, uv_size);
|
||||
@ -265,17 +272,17 @@ bool UpdateMetrics(uint8* ch_org, uint8* ch_rec,
|
||||
distorted_frame->y = ComputePSNR(y_err, static_cast<double>(y_size));
|
||||
distorted_frame->u = ComputePSNR(u_err, static_cast<double>(uv_size));
|
||||
distorted_frame->v = ComputePSNR(v_err, static_cast<double>(uv_size));
|
||||
distorted_frame->all = ComputePSNR(total_err,
|
||||
static_cast<double>(total_size));
|
||||
distorted_frame->all =
|
||||
ComputePSNR(total_err, static_cast<double>(total_size));
|
||||
} else {
|
||||
distorted_frame->y = CalcSSIM(ch_org, ch_rec, image_width, image_height);
|
||||
distorted_frame->u = CalcSSIM(u_org, u_rec, (image_width + 1) / 2,
|
||||
(image_height + 1) / 2);
|
||||
distorted_frame->v = CalcSSIM(v_org, v_rec, (image_width + 1) / 2,
|
||||
(image_height + 1) / 2);
|
||||
distorted_frame->u =
|
||||
CalcSSIM(u_org, u_rec, (image_width + 1) / 2, (image_height + 1) / 2);
|
||||
distorted_frame->v =
|
||||
CalcSSIM(v_org, v_rec, (image_width + 1) / 2, (image_height + 1) / 2);
|
||||
distorted_frame->all =
|
||||
(distorted_frame->y + distorted_frame->u + distorted_frame->v)
|
||||
/ total_size;
|
||||
(distorted_frame->y + distorted_frame->u + distorted_frame->v) /
|
||||
total_size;
|
||||
distorted_frame->y /= y_size;
|
||||
distorted_frame->u /= uv_size;
|
||||
distorted_frame->v /= uv_size;
|
||||
@ -330,8 +337,8 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
// Open all files to compare to
|
||||
FILE** file_rec = new FILE* [num_rec];
|
||||
memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT
|
||||
FILE** file_rec = new FILE*[num_rec];
|
||||
memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT
|
||||
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
|
||||
file_rec[cur_rec] = fopen(argv[fileindex_rec + cur_rec], "rb");
|
||||
if (file_rec[cur_rec] == NULL) {
|
||||
@ -347,19 +354,18 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
const int y_size = image_width * image_height;
|
||||
const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2);
|
||||
const size_t total_size = y_size + 2 * uv_size; // NOLINT
|
||||
const size_t total_size = y_size + 2 * uv_size; // NOLINT
|
||||
#if defined(_MSC_VER)
|
||||
_fseeki64(file_org,
|
||||
static_cast<__int64>(num_skip_org) *
|
||||
static_cast<__int64>(total_size), SEEK_SET);
|
||||
_fseeki64(file_org, static_cast<__int64>(num_skip_org) *
|
||||
static_cast<__int64>(total_size),
|
||||
SEEK_SET);
|
||||
#else
|
||||
fseek(file_org, num_skip_org * total_size, SEEK_SET);
|
||||
#endif
|
||||
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
|
||||
#if defined(_MSC_VER)
|
||||
_fseeki64(file_rec[cur_rec],
|
||||
static_cast<__int64>(num_skip_rec) *
|
||||
static_cast<__int64>(total_size),
|
||||
_fseeki64(file_rec[cur_rec], static_cast<__int64>(num_skip_rec) *
|
||||
static_cast<__int64>(total_size),
|
||||
SEEK_SET);
|
||||
#else
|
||||
fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET);
|
||||
@ -420,7 +426,7 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
int number_of_frames;
|
||||
for (number_of_frames = 0; ; ++number_of_frames) {
|
||||
for (number_of_frames = 0;; ++number_of_frames) {
|
||||
if (num_frames && number_of_frames >= num_frames)
|
||||
break;
|
||||
|
||||
@ -432,17 +438,11 @@ int main(int argc, const char* argv[]) {
|
||||
memcpy(ch_jpeg, ch_org, bytes_org);
|
||||
memset(ch_org, 0, total_size);
|
||||
|
||||
if (0 != libyuv::MJPGToI420(ch_jpeg, bytes_org,
|
||||
ch_org,
|
||||
image_width,
|
||||
ch_org + y_size,
|
||||
(image_width + 1) / 2,
|
||||
if (0 != libyuv::MJPGToI420(ch_jpeg, bytes_org, ch_org, image_width,
|
||||
ch_org + y_size, (image_width + 1) / 2,
|
||||
ch_org + y_size + uv_size,
|
||||
(image_width + 1) / 2,
|
||||
image_width,
|
||||
image_height,
|
||||
image_width,
|
||||
image_height)) {
|
||||
(image_width + 1) / 2, image_width,
|
||||
image_height, image_width, image_height)) {
|
||||
delete[] ch_jpeg;
|
||||
break;
|
||||
}
|
||||
@ -453,8 +453,8 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
|
||||
size_t bytes_rec = fread(ch_rec, sizeof(uint8),
|
||||
total_size, file_rec[cur_rec]);
|
||||
size_t bytes_rec =
|
||||
fread(ch_rec, sizeof(uint8), total_size, file_rec[cur_rec]);
|
||||
if (bytes_rec < total_size) {
|
||||
#ifdef HAVE_JPEG
|
||||
// Try parsing file as a jpeg.
|
||||
@ -462,17 +462,11 @@ int main(int argc, const char* argv[]) {
|
||||
memcpy(ch_jpeg, ch_rec, bytes_rec);
|
||||
memset(ch_rec, 0, total_size);
|
||||
|
||||
if (0 != libyuv::MJPGToI420(ch_jpeg, bytes_rec,
|
||||
ch_rec,
|
||||
image_width,
|
||||
ch_rec + y_size,
|
||||
(image_width + 1) / 2,
|
||||
if (0 != libyuv::MJPGToI420(ch_jpeg, bytes_rec, ch_rec, image_width,
|
||||
ch_rec + y_size, (image_width + 1) / 2,
|
||||
ch_rec + y_size + uv_size,
|
||||
(image_width + 1) / 2,
|
||||
image_width,
|
||||
image_height,
|
||||
image_width,
|
||||
image_height)) {
|
||||
(image_width + 1) / 2, image_width,
|
||||
image_height, image_width, image_height)) {
|
||||
delete[] ch_jpeg;
|
||||
break;
|
||||
}
|
||||
@ -488,10 +482,8 @@ int main(int argc, const char* argv[]) {
|
||||
if (do_psnr) {
|
||||
metric distorted_frame;
|
||||
metric* cur_distortion_psnr = &distortion_psnr[cur_rec];
|
||||
bool ismin = UpdateMetrics(ch_org, ch_rec,
|
||||
y_size, uv_size, total_size,
|
||||
number_of_frames,
|
||||
cur_distortion_psnr,
|
||||
bool ismin = UpdateMetrics(ch_org, ch_rec, y_size, uv_size, total_size,
|
||||
number_of_frames, cur_distortion_psnr,
|
||||
&distorted_frame, true);
|
||||
if (verbose) {
|
||||
printf("\t%10.6f", distorted_frame.y);
|
||||
@ -504,10 +496,8 @@ int main(int argc, const char* argv[]) {
|
||||
if (do_ssim) {
|
||||
metric distorted_frame;
|
||||
metric* cur_distortion_ssim = &distortion_ssim[cur_rec];
|
||||
bool ismin = UpdateMetrics(ch_org, ch_rec,
|
||||
y_size, uv_size, total_size,
|
||||
number_of_frames,
|
||||
cur_distortion_ssim,
|
||||
bool ismin = UpdateMetrics(ch_org, ch_rec, y_size, uv_size, total_size,
|
||||
number_of_frames, cur_distortion_ssim,
|
||||
&distorted_frame, false);
|
||||
if (verbose) {
|
||||
printf("\t%10.6f", distorted_frame.y);
|
||||
@ -543,24 +533,20 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
if (do_psnr) {
|
||||
const double global_psnr_y = ComputePSNR(
|
||||
cur_distortion_psnr->global_y,
|
||||
static_cast<double>(y_size) * number_of_frames);
|
||||
const double global_psnr_u = ComputePSNR(
|
||||
cur_distortion_psnr->global_u,
|
||||
static_cast<double>(uv_size) * number_of_frames);
|
||||
const double global_psnr_v = ComputePSNR(
|
||||
cur_distortion_psnr->global_v,
|
||||
static_cast<double>(uv_size) * number_of_frames);
|
||||
const double global_psnr_all = ComputePSNR(
|
||||
cur_distortion_psnr->global_all,
|
||||
static_cast<double>(total_size) * number_of_frames);
|
||||
printf("Global:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d",
|
||||
global_psnr_y,
|
||||
global_psnr_u,
|
||||
global_psnr_v,
|
||||
global_psnr_all,
|
||||
number_of_frames);
|
||||
const double global_psnr_y =
|
||||
ComputePSNR(cur_distortion_psnr->global_y,
|
||||
static_cast<double>(y_size) * number_of_frames);
|
||||
const double global_psnr_u =
|
||||
ComputePSNR(cur_distortion_psnr->global_u,
|
||||
static_cast<double>(uv_size) * number_of_frames);
|
||||
const double global_psnr_v =
|
||||
ComputePSNR(cur_distortion_psnr->global_v,
|
||||
static_cast<double>(uv_size) * number_of_frames);
|
||||
const double global_psnr_all =
|
||||
ComputePSNR(cur_distortion_psnr->global_all,
|
||||
static_cast<double>(total_size) * number_of_frames);
|
||||
printf("Global:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", global_psnr_y,
|
||||
global_psnr_u, global_psnr_v, global_psnr_all, number_of_frames);
|
||||
if (show_name) {
|
||||
printf("\t%s", argv[fileindex_rec + cur_rec]);
|
||||
}
|
||||
@ -570,20 +556,14 @@ int main(int argc, const char* argv[]) {
|
||||
if (!quiet) {
|
||||
printf("Avg:");
|
||||
if (do_psnr) {
|
||||
printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d",
|
||||
cur_distortion_psnr->y,
|
||||
cur_distortion_psnr->u,
|
||||
cur_distortion_psnr->v,
|
||||
cur_distortion_psnr->all,
|
||||
number_of_frames);
|
||||
printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", cur_distortion_psnr->y,
|
||||
cur_distortion_psnr->u, cur_distortion_psnr->v,
|
||||
cur_distortion_psnr->all, number_of_frames);
|
||||
}
|
||||
if (do_ssim) {
|
||||
printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d",
|
||||
cur_distortion_ssim->y,
|
||||
cur_distortion_ssim->u,
|
||||
cur_distortion_ssim->v,
|
||||
cur_distortion_ssim->all,
|
||||
number_of_frames);
|
||||
printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", cur_distortion_ssim->y,
|
||||
cur_distortion_ssim->u, cur_distortion_ssim->v,
|
||||
cur_distortion_ssim->all, number_of_frames);
|
||||
}
|
||||
if (show_name) {
|
||||
printf("\t%s", argv[fileindex_rec + cur_rec]);
|
||||
@ -594,19 +574,15 @@ int main(int argc, const char* argv[]) {
|
||||
printf("Min:");
|
||||
if (do_psnr) {
|
||||
printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d",
|
||||
cur_distortion_psnr->min_y,
|
||||
cur_distortion_psnr->min_u,
|
||||
cur_distortion_psnr->min_v,
|
||||
cur_distortion_psnr->min_all,
|
||||
cur_distortion_psnr->min_frame);
|
||||
cur_distortion_psnr->min_y, cur_distortion_psnr->min_u,
|
||||
cur_distortion_psnr->min_v, cur_distortion_psnr->min_all,
|
||||
cur_distortion_psnr->min_frame);
|
||||
}
|
||||
if (do_ssim) {
|
||||
printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d",
|
||||
cur_distortion_ssim->min_y,
|
||||
cur_distortion_ssim->min_u,
|
||||
cur_distortion_ssim->min_v,
|
||||
cur_distortion_ssim->min_all,
|
||||
cur_distortion_ssim->min_frame);
|
||||
cur_distortion_ssim->min_y, cur_distortion_ssim->min_u,
|
||||
cur_distortion_ssim->min_v, cur_distortion_ssim->min_all,
|
||||
cur_distortion_ssim->min_frame);
|
||||
}
|
||||
if (show_name) {
|
||||
printf("\t%s", argv[fileindex_rec + cur_rec]);
|
||||
@ -615,20 +591,20 @@ int main(int argc, const char* argv[]) {
|
||||
}
|
||||
|
||||
if (do_mse) {
|
||||
double global_mse_y = GetMSE(cur_distortion_psnr->global_y,
|
||||
static_cast<double>(y_size) * number_of_frames);
|
||||
double global_mse_u = GetMSE(cur_distortion_psnr->global_u,
|
||||
static_cast<double>(uv_size) * number_of_frames);
|
||||
double global_mse_v = GetMSE(cur_distortion_psnr->global_v,
|
||||
static_cast<double>(uv_size) * number_of_frames);
|
||||
double global_mse_all = GetMSE(cur_distortion_psnr->global_all,
|
||||
static_cast<double>(total_size) * number_of_frames);
|
||||
printf("MSE:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d",
|
||||
global_mse_y,
|
||||
global_mse_u,
|
||||
global_mse_v,
|
||||
global_mse_all,
|
||||
number_of_frames);
|
||||
double global_mse_y =
|
||||
GetMSE(cur_distortion_psnr->global_y,
|
||||
static_cast<double>(y_size) * number_of_frames);
|
||||
double global_mse_u =
|
||||
GetMSE(cur_distortion_psnr->global_u,
|
||||
static_cast<double>(uv_size) * number_of_frames);
|
||||
double global_mse_v =
|
||||
GetMSE(cur_distortion_psnr->global_v,
|
||||
static_cast<double>(uv_size) * number_of_frames);
|
||||
double global_mse_all =
|
||||
GetMSE(cur_distortion_psnr->global_all,
|
||||
static_cast<double>(total_size) * number_of_frames);
|
||||
printf("MSE:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", global_mse_y,
|
||||
global_mse_u, global_mse_v, global_mse_all, number_of_frames);
|
||||
if (show_name) {
|
||||
printf("\t%s", argv[fileindex_rec + cur_rec]);
|
||||
}
|
||||
|
||||
161
util/ssim.cc
161
util/ssim.cc
@ -16,11 +16,11 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned int uint32; // NOLINT
|
||||
typedef unsigned short uint16; // NOLINT
|
||||
typedef unsigned int uint32; // NOLINT
|
||||
typedef unsigned short uint16; // NOLINT
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && !defined(__SSE2__) && \
|
||||
(defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)))
|
||||
(defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)))
|
||||
#define __SSE2__
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__)
|
||||
@ -38,22 +38,29 @@ enum { KERNEL = 3, KERNEL_SIZE = 2 * KERNEL + 1 };
|
||||
// The maximum value (11 x 11) must be less than 128 to avoid sign
|
||||
// problems during the calls to _mm_mullo_epi16().
|
||||
static const int K[KERNEL_SIZE] = {
|
||||
1, 3, 7, 11, 7, 3, 1 // ~11 * exp(-0.3 * i * i)
|
||||
1, 3, 7, 11, 7, 3, 1 // ~11 * exp(-0.3 * i * i)
|
||||
};
|
||||
static const double kiW[KERNEL + 1 + 1] = {
|
||||
1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j]
|
||||
1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j]
|
||||
1. / 1056., // 1 / sum(i:0..5, j..6) K[i]*K[j]
|
||||
1. / 957., // 1 / sum(i:0..4, j..6) K[i]*K[j]
|
||||
1. / 726., // 1 / sum(i:0..3, j..6) K[i]*K[j]
|
||||
1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j]
|
||||
1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j]
|
||||
1. / 1056., // 1 / sum(i:0..5, j..6) K[i]*K[j]
|
||||
1. / 957., // 1 / sum(i:0..4, j..6) K[i]*K[j]
|
||||
1. / 726., // 1 / sum(i:0..3, j..6) K[i]*K[j]
|
||||
};
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(__SSE2__)
|
||||
|
||||
#define PWEIGHT(A, B) static_cast<uint16>(K[(A)] * K[(B)]) // weight product
|
||||
#define MAKE_WEIGHT(L) \
|
||||
{ { { PWEIGHT(L, 0), PWEIGHT(L, 1), PWEIGHT(L, 2), PWEIGHT(L, 3), \
|
||||
PWEIGHT(L, 4), PWEIGHT(L, 5), PWEIGHT(L, 6), 0 } } }
|
||||
#define PWEIGHT(A, B) static_cast<uint16>(K[(A)] * K[(B)]) // weight product
|
||||
#define MAKE_WEIGHT(L) \
|
||||
{ \
|
||||
{ \
|
||||
{ \
|
||||
PWEIGHT(L, 0) \
|
||||
, PWEIGHT(L, 1), PWEIGHT(L, 2), PWEIGHT(L, 3), PWEIGHT(L, 4), \
|
||||
PWEIGHT(L, 5), PWEIGHT(L, 6), 0 \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// We need this union trick to be able to initialize constant static __m128i
|
||||
// values. We can't call _mm_set_epi16() for static compile-time initialization.
|
||||
@ -62,32 +69,36 @@ static const struct {
|
||||
uint16 i16_[8];
|
||||
__m128i m_;
|
||||
} values_;
|
||||
} W0 = MAKE_WEIGHT(0),
|
||||
W1 = MAKE_WEIGHT(1),
|
||||
W2 = MAKE_WEIGHT(2),
|
||||
} W0 = MAKE_WEIGHT(0), W1 = MAKE_WEIGHT(1), W2 = MAKE_WEIGHT(2),
|
||||
W3 = MAKE_WEIGHT(3);
|
||||
// ... the rest is symmetric.
|
||||
// ... the rest is symmetric.
|
||||
#undef MAKE_WEIGHT
|
||||
#undef PWEIGHT
|
||||
#endif
|
||||
|
||||
// Common final expression for SSIM, once the weighted sums are known.
|
||||
static double FinalizeSSIM(double iw, double xm, double ym,
|
||||
double xxm, double xym, double yym) {
|
||||
static double FinalizeSSIM(double iw,
|
||||
double xm,
|
||||
double ym,
|
||||
double xxm,
|
||||
double xym,
|
||||
double yym) {
|
||||
const double iwx = xm * iw;
|
||||
const double iwy = ym * iw;
|
||||
double sxx = xxm * iw - iwx * iwx;
|
||||
double syy = yym * iw - iwy * iwy;
|
||||
// small errors are possible, due to rounding. Clamp to zero.
|
||||
if (sxx < 0.) sxx = 0.;
|
||||
if (syy < 0.) syy = 0.;
|
||||
if (sxx < 0.)
|
||||
sxx = 0.;
|
||||
if (syy < 0.)
|
||||
syy = 0.;
|
||||
const double sxsy = sqrt(sxx * syy);
|
||||
const double sxy = xym * iw - iwx * iwy;
|
||||
static const double C11 = (0.01 * 0.01) * (255 * 255);
|
||||
static const double C22 = (0.03 * 0.03) * (255 * 255);
|
||||
static const double C33 = (0.015 * 0.015) * (255 * 255);
|
||||
const double l = (2. * iwx * iwy + C11) / (iwx * iwx + iwy * iwy + C11);
|
||||
const double c = (2. * sxsy + C22) / (sxx + syy + C22);
|
||||
const double c = (2. * sxsy + C22) / (sxx + syy + C22);
|
||||
const double s = (sxy + C33) / (sxsy + C33);
|
||||
return l * c * s;
|
||||
}
|
||||
@ -98,15 +109,21 @@ static double FinalizeSSIM(double iw, double xm, double ym,
|
||||
// Note: worst case of accumulation is a weight of 33 = 11 + 2 * (7 + 3 + 1)
|
||||
// with a diff of 255, squared. The maximum error is thus 0x4388241,
|
||||
// which fits into 32 bits integers.
|
||||
double GetSSIM(const uint8 *org, const uint8 *rec,
|
||||
int xo, int yo, int W, int H, int stride) {
|
||||
double GetSSIM(const uint8* org,
|
||||
const uint8* rec,
|
||||
int xo,
|
||||
int yo,
|
||||
int W,
|
||||
int H,
|
||||
int stride) {
|
||||
uint32 ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
|
||||
org += (yo - KERNEL) * stride;
|
||||
org += (xo - KERNEL);
|
||||
rec += (yo - KERNEL) * stride;
|
||||
rec += (xo - KERNEL);
|
||||
for (int y_ = 0; y_ < KERNEL_SIZE; ++y_, org += stride, rec += stride) {
|
||||
if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) continue;
|
||||
if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H))
|
||||
continue;
|
||||
const int Wy = K[y_];
|
||||
for (int x_ = 0; x_ < KERNEL_SIZE; ++x_) {
|
||||
const int Wxy = Wy * K[x_];
|
||||
@ -114,8 +131,8 @@ double GetSSIM(const uint8 *org, const uint8 *rec,
|
||||
const int org_x = org[x_];
|
||||
const int rec_x = rec[x_];
|
||||
ws += Wxy;
|
||||
xm += Wxy * org_x;
|
||||
ym += Wxy * rec_x;
|
||||
xm += Wxy * org_x;
|
||||
ym += Wxy * rec_x;
|
||||
xxm += Wxy * org_x * org_x;
|
||||
xym += Wxy * org_x * rec_x;
|
||||
yym += Wxy * rec_x * rec_x;
|
||||
@ -125,8 +142,11 @@ double GetSSIM(const uint8 *org, const uint8 *rec,
|
||||
return FinalizeSSIM(1. / ws, xm, ym, xxm, xym, yym);
|
||||
}
|
||||
|
||||
double GetSSIMFullKernel(const uint8 *org, const uint8 *rec,
|
||||
int xo, int yo, int stride,
|
||||
double GetSSIMFullKernel(const uint8* org,
|
||||
const uint8* rec,
|
||||
int xo,
|
||||
int yo,
|
||||
int stride,
|
||||
double area_weight) {
|
||||
uint32 xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
|
||||
|
||||
@ -161,8 +181,8 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec,
|
||||
const int ll2 = rec[dy2 - x];
|
||||
const int lr2 = rec[dy2 + x];
|
||||
|
||||
xm += Wxy * (ul1 + ur1 + ll1 + lr1);
|
||||
ym += Wxy * (ul2 + ur2 + ll2 + lr2);
|
||||
xm += Wxy * (ul1 + ur1 + ll1 + lr1);
|
||||
ym += Wxy * (ul2 + ur2 + ll2 + lr2);
|
||||
xxm += Wxy * (ul1 * ul1 + ur1 * ur1 + ll1 * ll1 + lr1 * lr1);
|
||||
xym += Wxy * (ul1 * ul2 + ur1 * ur2 + ll1 * ll2 + lr1 * lr2);
|
||||
yym += Wxy * (ul2 * ul2 + ur2 * ur2 + ll2 * ll2 + lr2 * lr2);
|
||||
@ -189,8 +209,8 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec,
|
||||
const int l2 = rec[-y];
|
||||
const int r2 = rec[y];
|
||||
|
||||
xm += Wxy * (u1 + d1 + l1 + r1);
|
||||
ym += Wxy * (u2 + d2 + l2 + r2);
|
||||
xm += Wxy * (u1 + d1 + l1 + r1);
|
||||
ym += Wxy * (u2 + d2 + l2 + r2);
|
||||
xxm += Wxy * (u1 * u1 + d1 * d1 + l1 * l1 + r1 * r1);
|
||||
xym += Wxy * (u1 * u2 + d1 * d2 + l1 * l2 + r1 * r2);
|
||||
yym += Wxy * (u2 * u2 + d2 * d2 + l2 * l2 + r2 * r2);
|
||||
@ -201,13 +221,13 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec,
|
||||
const int s1 = org[0];
|
||||
const int s2 = rec[0];
|
||||
|
||||
xm += Wxy * s1;
|
||||
ym += Wxy * s2;
|
||||
xm += Wxy * s1;
|
||||
ym += Wxy * s2;
|
||||
xxm += Wxy * s1 * s1;
|
||||
xym += Wxy * s1 * s2;
|
||||
yym += Wxy * s2 * s2;
|
||||
|
||||
#else // __SSE2__
|
||||
#else // __SSE2__
|
||||
|
||||
org += (yo - KERNEL) * stride + (xo - KERNEL);
|
||||
rec += (yo - KERNEL) * stride + (xo - KERNEL);
|
||||
@ -221,29 +241,31 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec,
|
||||
|
||||
// Read 8 pixels at line #L, and convert to 16bit, perform weighting
|
||||
// and acccumulate.
|
||||
#define LOAD_LINE_PAIR(L, WEIGHT) do { \
|
||||
const __m128i v0 = \
|
||||
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(org + (L) * stride)); \
|
||||
const __m128i v1 = \
|
||||
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(rec + (L) * stride)); \
|
||||
const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \
|
||||
const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \
|
||||
const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \
|
||||
const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \
|
||||
x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \
|
||||
y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \
|
||||
x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \
|
||||
y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \
|
||||
xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \
|
||||
xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \
|
||||
yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \
|
||||
} while (0)
|
||||
#define LOAD_LINE_PAIR(L, WEIGHT) \
|
||||
do { \
|
||||
const __m128i v0 = \
|
||||
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(org + (L)*stride)); \
|
||||
const __m128i v1 = \
|
||||
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(rec + (L)*stride)); \
|
||||
const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \
|
||||
const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \
|
||||
const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \
|
||||
const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \
|
||||
x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \
|
||||
y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \
|
||||
x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \
|
||||
y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \
|
||||
xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \
|
||||
xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \
|
||||
yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \
|
||||
} while (0)
|
||||
|
||||
#define ADD_AND_STORE_FOUR_EPI32(M, OUT) do { \
|
||||
uint32 tmp[4]; \
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \
|
||||
(OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \
|
||||
} while (0)
|
||||
#define ADD_AND_STORE_FOUR_EPI32(M, OUT) \
|
||||
do { \
|
||||
uint32 tmp[4]; \
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \
|
||||
(OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \
|
||||
} while (0)
|
||||
|
||||
LOAD_LINE_PAIR(0, W0);
|
||||
LOAD_LINE_PAIR(1, W1);
|
||||
@ -266,10 +288,14 @@ double GetSSIMFullKernel(const uint8 *org, const uint8 *rec,
|
||||
return FinalizeSSIM(area_weight, xm, ym, xxm, xym, yym);
|
||||
}
|
||||
|
||||
static int start_max(int x, int y) { return (x > y) ? x : y; }
|
||||
static int start_max(int x, int y) {
|
||||
return (x > y) ? x : y;
|
||||
}
|
||||
|
||||
double CalcSSIM(const uint8 *org, const uint8 *rec,
|
||||
const int image_width, const int image_height) {
|
||||
double CalcSSIM(const uint8* org,
|
||||
const uint8* rec,
|
||||
const int image_width,
|
||||
const int image_height) {
|
||||
double SSIM = 0.;
|
||||
const int KERNEL_Y = (image_height < KERNEL) ? image_height : KERNEL;
|
||||
const int KERNEL_X = (image_width < KERNEL) ? image_width : KERNEL;
|
||||
@ -284,7 +310,7 @@ double CalcSSIM(const uint8 *org, const uint8 *rec,
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+: SSIM)
|
||||
#pragma omp parallel for reduction(+ : SSIM)
|
||||
#endif
|
||||
for (int j = KERNEL_Y; j < image_height - KERNEL_Y; ++j) {
|
||||
for (int i = 0; i < KERNEL_X; ++i) {
|
||||
@ -302,8 +328,8 @@ double CalcSSIM(const uint8 *org, const uint8 *rec,
|
||||
// NOTE: we could use similar method for the left-most pixels too.
|
||||
const int kScratchWidth = 8;
|
||||
const int kScratchStride = kScratchWidth + KERNEL + 1;
|
||||
uint8 scratch_org[KERNEL_SIZE * kScratchStride] = { 0 };
|
||||
uint8 scratch_rec[KERNEL_SIZE * kScratchStride] = { 0 };
|
||||
uint8 scratch_org[KERNEL_SIZE * kScratchStride] = {0};
|
||||
uint8 scratch_rec[KERNEL_SIZE * kScratchStride] = {0};
|
||||
|
||||
for (int k = 0; k < KERNEL_SIZE; ++k) {
|
||||
const int offset =
|
||||
@ -311,9 +337,9 @@ double CalcSSIM(const uint8 *org, const uint8 *rec,
|
||||
memcpy(scratch_org + k * kScratchStride, org + offset, kScratchWidth);
|
||||
memcpy(scratch_rec + k * kScratchStride, rec + offset, kScratchWidth);
|
||||
}
|
||||
for (int k = 0; k <= KERNEL_X + 1; ++k) {
|
||||
SSIM += GetSSIMFullKernel(scratch_org, scratch_rec,
|
||||
KERNEL + k, KERNEL, kScratchStride, kiW[k]);
|
||||
for (int k = 0; k <= KERNEL_X + 1; ++k) {
|
||||
SSIM += GetSSIMFullKernel(scratch_org, scratch_rec, KERNEL + k, KERNEL,
|
||||
kScratchStride, kiW[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -333,4 +359,3 @@ double CalcLSSIM(double ssim) {
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
|
||||
@ -24,8 +24,10 @@ typedef unsigned char uint8;
|
||||
#define UINT8_TYPE_DEFINED
|
||||
#endif
|
||||
|
||||
double CalcSSIM(const uint8* org, const uint8* rec,
|
||||
const int image_width, const int image_height);
|
||||
double CalcSSIM(const uint8* org,
|
||||
const uint8* rec,
|
||||
const int image_width,
|
||||
const int image_height);
|
||||
|
||||
double CalcLSSIM(double ssim);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user