mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
ARGBToJ444 use 256 for fixed point scale UV
- use negative coefficients for UV to allow -128 - change shift to truncate instead of round for UV - adapt all row_gcc RGB to UV into matrix functions - add -DLIBYUV_ENABLE_ROWWIN to allow clang on Windows to use row_win.cc Bug: 381138208 Change-Id: I6016062c859faf147a8a2cdea6c09976cbf2963c Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6277710 Reviewed-by: Wan-Teh Chang <wtc@google.com> Reviewed-by: James Zern <jzern@google.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
5257ba4db0
commit
c060118bea
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||||
Version: 1905
|
Version: 1906
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
Shipped: yes
|
Shipped: yes
|
||||||
|
|||||||
@ -20,8 +20,9 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for Visual C and GCC:
|
// The following are available for Visual C and GCC:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86))
|
((defined(__x86_64__) && !defined(LIBYUV_ENABLE_ROWWIN)) || \
|
||||||
|
defined(__i386__) || defined(_M_IX86))
|
||||||
#define HAS_HASHDJB2_SSE41
|
#define HAS_HASHDJB2_SSE41
|
||||||
#define HAS_SUMSQUAREERROR_SSE2
|
#define HAS_SUMSQUAREERROR_SSE2
|
||||||
#define HAS_HAMMINGDISTANCE_SSE42
|
#define HAS_HAMMINGDISTANCE_SSE42
|
||||||
@ -36,13 +37,16 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for GCC and clangcl:
|
// The following are available for GCC and clangcl:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
#define HAS_HAMMINGDISTANCE_SSSE3
|
#define HAS_HAMMINGDISTANCE_SSSE3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for GCC and clangcl:
|
// The following are available for GCC and clangcl:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
|
||||||
(defined(__x86_64__) || defined(__i386__))
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
#define HAS_HAMMINGDISTANCE_AVX2
|
#define HAS_HAMMINGDISTANCE_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -44,21 +44,24 @@ extern "C" {
|
|||||||
#endif // __clang__
|
#endif // __clang__
|
||||||
|
|
||||||
// GCC >= 4.7.0 required for AVX2.
|
// GCC >= 4.7.0 required for AVX2.
|
||||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
#if defined(__GNUC__) && !defined(LIBYUV_ENABLE_ROWWIN) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__))
|
||||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
|
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
|
||||||
#define GCC_HAS_AVX2 1
|
#define GCC_HAS_AVX2 1
|
||||||
#endif // GNUC >= 4.7
|
#endif // GNUC >= 4.7
|
||||||
#endif // __GNUC__
|
#endif // __GNUC__
|
||||||
|
|
||||||
// clang >= 3.4.0 required for AVX2.
|
// clang >= 3.4.0 required for AVX2.
|
||||||
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
#if defined(__clang__) && !defined(LIBYUV_ENABLE_ROWWIN) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__))
|
||||||
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
|
#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
|
||||||
#define CLANG_HAS_AVX2 1
|
#define CLANG_HAS_AVX2 1
|
||||||
#endif // clang >= 3.4
|
#endif // clang >= 3.4
|
||||||
#endif // __clang__
|
#endif // __clang__
|
||||||
|
|
||||||
// clang >= 6.0.0 required for AVX512.
|
// clang >= 6.0.0 required for AVX512.
|
||||||
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
#if defined(__clang__) && !defined(LIBYUV_ENABLE_ROWWIN) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__))
|
||||||
// clang in xcode follows a different versioning scheme.
|
// clang in xcode follows a different versioning scheme.
|
||||||
// TODO(fbarchard): fix xcode 9 ios b/789.
|
// TODO(fbarchard): fix xcode 9 ios b/789.
|
||||||
#if (__clang_major__ >= 7) && !defined(__APPLE__)
|
#if (__clang_major__ >= 7) && !defined(__APPLE__)
|
||||||
@ -67,8 +70,9 @@ extern "C" {
|
|||||||
#endif // __clang__
|
#endif // __clang__
|
||||||
|
|
||||||
// Visual C 2012 required for AVX2.
|
// Visual C 2012 required for AVX2.
|
||||||
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
|
#if defined(_M_IX86) && \
|
||||||
_MSC_VER >= 1700
|
(!defined(__clang__) || defined(LIBYUV_ENABLE_ROWWIN)) && \
|
||||||
|
defined(_MSC_VER) && _MSC_VER >= 1700
|
||||||
#define VISUALC_HAS_AVX2 1
|
#define VISUALC_HAS_AVX2 1
|
||||||
#endif // VisualStudio >= 2012
|
#endif // VisualStudio >= 2012
|
||||||
|
|
||||||
|
|||||||
@ -38,8 +38,10 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
// The following are available on all x86 platforms:
|
// The following are available on all x86 platforms:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
(defined(_M_IX86) || \
|
||||||
|
(defined(__x86_64__) && !defined(LIBYUV_ENABLE_ROWWIN)) || \
|
||||||
|
defined(__i386__))
|
||||||
#define HAS_ARGBAFFINEROW_SSE2
|
#define HAS_ARGBAFFINEROW_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -20,6 +20,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for Visual C 32 bit:
|
// The following are available for Visual C 32 bit:
|
||||||
|
// TODO - port to clangcl on rotate_win
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
|
||||||
!defined(__clang__)
|
!defined(__clang__)
|
||||||
#define HAS_TRANSPOSEWX8_SSSE3
|
#define HAS_TRANSPOSEWX8_SSSE3
|
||||||
@ -27,14 +28,17 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for GCC 32 or 64 bit:
|
// The following are available for GCC 32 or 64 bit:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__i386__) || defined(__x86_64__)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
#define HAS_TRANSPOSEWX8_SSSE3
|
#define HAS_TRANSPOSEWX8_SSSE3
|
||||||
#define HAS_TRANSPOSE4X4_32_SSE2
|
#define HAS_TRANSPOSE4X4_32_SSE2
|
||||||
#define HAS_TRANSPOSE4X4_32_AVX2
|
#define HAS_TRANSPOSE4X4_32_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for 64 bit GCC:
|
// The following are available for 64 bit GCC:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
|
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
#define HAS_TRANSPOSEWX8_FAST_SSSE3
|
#define HAS_TRANSPOSEWX8_FAST_SSSE3
|
||||||
#define HAS_TRANSPOSEUVWX8_SSE2
|
#define HAS_TRANSPOSEUVWX8_SSE2
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -23,10 +23,11 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on all x86 platforms:
|
// The following are available on all x86 platforms:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
(defined(_M_IX86) || \
|
||||||
|
(defined(__x86_64__) && !defined(LIBYUV_ENABLE_ROWWIN)) || \
|
||||||
|
defined(__i386__))
|
||||||
// Conversions:
|
// Conversions:
|
||||||
#define HAS_ABGRTOYROW_SSSE3
|
|
||||||
#define HAS_ARGB1555TOARGBROW_SSE2
|
#define HAS_ARGB1555TOARGBROW_SSE2
|
||||||
#define HAS_ARGB4444TOARGBROW_SSE2
|
#define HAS_ARGB4444TOARGBROW_SSE2
|
||||||
#define HAS_ARGBEXTRACTALPHAROW_SSE2
|
#define HAS_ARGBEXTRACTALPHAROW_SSE2
|
||||||
@ -38,9 +39,6 @@ extern "C" {
|
|||||||
#define HAS_ARGBTORGB24ROW_SSSE3
|
#define HAS_ARGBTORGB24ROW_SSSE3
|
||||||
#define HAS_ARGBTORGB565DITHERROW_SSE2
|
#define HAS_ARGBTORGB565DITHERROW_SSE2
|
||||||
#define HAS_ARGBTORGB565ROW_SSE2
|
#define HAS_ARGBTORGB565ROW_SSE2
|
||||||
#define HAS_ARGBTOYJROW_SSSE3
|
|
||||||
#define HAS_ARGBTOYROW_SSSE3
|
|
||||||
#define HAS_BGRATOYROW_SSSE3
|
|
||||||
#define HAS_COPYROW_ERMS
|
#define HAS_COPYROW_ERMS
|
||||||
#define HAS_COPYROW_SSE2
|
#define HAS_COPYROW_SSE2
|
||||||
#define HAS_H422TOARGBROW_SSSE3
|
#define HAS_H422TOARGBROW_SSSE3
|
||||||
@ -68,13 +66,8 @@ extern "C" {
|
|||||||
#define HAS_NV21TORGB24ROW_SSSE3
|
#define HAS_NV21TORGB24ROW_SSSE3
|
||||||
#define HAS_RAWTOARGBROW_SSSE3
|
#define HAS_RAWTOARGBROW_SSSE3
|
||||||
#define HAS_RAWTORGB24ROW_SSSE3
|
#define HAS_RAWTORGB24ROW_SSSE3
|
||||||
#define HAS_RAWTOYJROW_SSSE3
|
|
||||||
#define HAS_RAWTOYROW_SSSE3
|
|
||||||
#define HAS_RGB24TOARGBROW_SSSE3
|
#define HAS_RGB24TOARGBROW_SSSE3
|
||||||
#define HAS_RGB24TOYJROW_SSSE3
|
|
||||||
#define HAS_RGB24TOYROW_SSSE3
|
|
||||||
#define HAS_RGB565TOARGBROW_SSE2
|
#define HAS_RGB565TOARGBROW_SSE2
|
||||||
#define HAS_RGBATOYROW_SSSE3
|
|
||||||
#define HAS_SETROW_ERMS
|
#define HAS_SETROW_ERMS
|
||||||
#define HAS_SETROW_X86
|
#define HAS_SETROW_X86
|
||||||
#define HAS_SPLITUVROW_SSE2
|
#define HAS_SPLITUVROW_SSE2
|
||||||
@ -88,7 +81,6 @@ extern "C" {
|
|||||||
#define HAS_YUY2TOYROW_SSE2
|
#define HAS_YUY2TOYROW_SSE2
|
||||||
#if !defined(LIBYUV_BIT_EXACT)
|
#if !defined(LIBYUV_BIT_EXACT)
|
||||||
#define HAS_ABGRTOUVROW_SSSE3
|
#define HAS_ABGRTOUVROW_SSSE3
|
||||||
#define HAS_ARGBTOUVJROW_SSSE3
|
|
||||||
#define HAS_ARGBTOUVROW_SSSE3
|
#define HAS_ARGBTOUVROW_SSSE3
|
||||||
#define HAS_BGRATOUVROW_SSSE3
|
#define HAS_BGRATOUVROW_SSSE3
|
||||||
#define HAS_RGBATOUVROW_SSSE3
|
#define HAS_RGBATOUVROW_SSSE3
|
||||||
@ -124,13 +116,24 @@ extern "C" {
|
|||||||
|
|
||||||
// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
|
// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
|
||||||
// caveat: clangcl uses row_win.cc which works.
|
// caveat: clangcl uses row_win.cc which works.
|
||||||
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
|
#if (defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
|
||||||
defined(_MSC_VER)
|
defined(_MSC_VER)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
// TODO(fbarchard): fix build error on android_full_debug=1
|
// TODO(fbarchard): fix build error on android_full_debug=1
|
||||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||||
#define HAS_I422ALPHATOARGBROW_SSSE3
|
#define HAS_I422ALPHATOARGBROW_SSSE3
|
||||||
#define HAS_I444ALPHATOARGBROW_SSSE3
|
#define HAS_I444ALPHATOARGBROW_SSSE3
|
||||||
#endif
|
#endif
|
||||||
|
#if (defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
|
||||||
|
defined(_MSC_VER)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
|
// TODO(fbarchard): fix build error on android_full_debug=1
|
||||||
|
// https://code.google.com/p/libyuv/issues/detail?id=517
|
||||||
|
// TODO(fbarchard): fix LIBYUV_ENABLE_ROWWIN with clang
|
||||||
|
#define HAS_I422ALPHATOARGBROW_AVX2
|
||||||
|
#define HAS_I444ALPHATOARGBROW_AVX2
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on all x86 platforms, but
|
// The following are available on all x86 platforms, but
|
||||||
@ -145,8 +148,6 @@ extern "C" {
|
|||||||
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
#define HAS_ARGBPOLYNOMIALROW_AVX2
|
||||||
#define HAS_ARGBSHUFFLEROW_AVX2
|
#define HAS_ARGBSHUFFLEROW_AVX2
|
||||||
#define HAS_ARGBTORGB565DITHERROW_AVX2
|
#define HAS_ARGBTORGB565DITHERROW_AVX2
|
||||||
#define HAS_ARGBTOYJROW_AVX2
|
|
||||||
#define HAS_ARGBTOYROW_AVX2
|
|
||||||
#define HAS_COPYROW_AVX
|
#define HAS_COPYROW_AVX
|
||||||
#define HAS_H422TOARGBROW_AVX2
|
#define HAS_H422TOARGBROW_AVX2
|
||||||
#define HAS_HALFFLOATROW_AVX2
|
#define HAS_HALFFLOATROW_AVX2
|
||||||
@ -167,8 +168,6 @@ extern "C" {
|
|||||||
#define HAS_NV12TORGB565ROW_AVX2
|
#define HAS_NV12TORGB565ROW_AVX2
|
||||||
#define HAS_NV21TOARGBROW_AVX2
|
#define HAS_NV21TOARGBROW_AVX2
|
||||||
#define HAS_NV21TORGB24ROW_AVX2
|
#define HAS_NV21TORGB24ROW_AVX2
|
||||||
#define HAS_RAWTOYJROW_AVX2
|
|
||||||
#define HAS_RGB24TOYJROW_AVX2
|
|
||||||
#define HAS_SPLITUVROW_AVX2
|
#define HAS_SPLITUVROW_AVX2
|
||||||
#define HAS_UYVYTOARGBROW_AVX2
|
#define HAS_UYVYTOARGBROW_AVX2
|
||||||
#define HAS_UYVYTOUV422ROW_AVX2
|
#define HAS_UYVYTOUV422ROW_AVX2
|
||||||
@ -179,10 +178,6 @@ extern "C" {
|
|||||||
#define HAS_YUY2TOUVROW_AVX2
|
#define HAS_YUY2TOUVROW_AVX2
|
||||||
#define HAS_YUY2TOYROW_AVX2
|
#define HAS_YUY2TOYROW_AVX2
|
||||||
// #define HAS_HALFFLOATROW_F16C // Enable to test half float cast
|
// #define HAS_HALFFLOATROW_F16C // Enable to test half float cast
|
||||||
#if !defined(LIBYUV_BIT_EXACT)
|
|
||||||
#define HAS_ARGBTOUVJROW_AVX2
|
|
||||||
#define HAS_ARGBTOUVROW_AVX2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Effects:
|
// Effects:
|
||||||
#define HAS_ARGBADDROW_AVX2
|
#define HAS_ARGBADDROW_AVX2
|
||||||
@ -190,14 +185,6 @@ extern "C" {
|
|||||||
// #define HAS_ARGBMULTIPLYROW_AVX2
|
// #define HAS_ARGBMULTIPLYROW_AVX2
|
||||||
#define HAS_ARGBSUBTRACTROW_AVX2
|
#define HAS_ARGBSUBTRACTROW_AVX2
|
||||||
#define HAS_BLENDPLANEROW_AVX2
|
#define HAS_BLENDPLANEROW_AVX2
|
||||||
|
|
||||||
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
|
|
||||||
defined(_MSC_VER)
|
|
||||||
// TODO(fbarchard): fix build error on android_full_debug=1
|
|
||||||
// https://code.google.com/p/libyuv/issues/detail?id=517
|
|
||||||
#define HAS_I422ALPHATOARGBROW_AVX2
|
|
||||||
#define HAS_I444ALPHATOARGBROW_AVX2
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available for AVX2 Visual C 32 bit:
|
// The following are available for AVX2 Visual C 32 bit:
|
||||||
@ -224,7 +211,10 @@ extern "C" {
|
|||||||
|
|
||||||
// The following are available for gcc/clang x86 platforms:
|
// The following are available for gcc/clang x86 platforms:
|
||||||
// TODO(fbarchard): Port to Visual C
|
// TODO(fbarchard): Port to Visual C
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
|
#define HAS_RAWTOYJROW_SSSE3
|
||||||
#define HAS_AB64TOARGBROW_SSSE3
|
#define HAS_AB64TOARGBROW_SSSE3
|
||||||
#define HAS_ABGRTOAR30ROW_SSSE3
|
#define HAS_ABGRTOAR30ROW_SSSE3
|
||||||
#define HAS_ABGRTOYJROW_SSSE3
|
#define HAS_ABGRTOYJROW_SSSE3
|
||||||
@ -272,8 +262,20 @@ extern "C" {
|
|||||||
#define HAS_SPLITXRGBROW_SSSE3
|
#define HAS_SPLITXRGBROW_SSSE3
|
||||||
#define HAS_SWAPUVROW_SSSE3
|
#define HAS_SWAPUVROW_SSSE3
|
||||||
#define HAS_YUY2TONVUVROW_SSE2
|
#define HAS_YUY2TONVUVROW_SSE2
|
||||||
|
// TODO: port row_win to use 8 bit coefficients.
|
||||||
|
#define HAS_ARGBTOYJROW_SSSE3
|
||||||
|
#define HAS_ARGBTOYROW_SSSE3
|
||||||
|
#define HAS_BGRATOYROW_SSSE3
|
||||||
|
#define HAS_RAWTOYROW_SSSE3
|
||||||
|
#define HAS_ABGRTOYROW_SSSE3
|
||||||
|
#define HAS_RGB24TOYJROW_SSSE3
|
||||||
|
#define HAS_RGB24TOYROW_SSSE3
|
||||||
|
#define HAS_RGBATOYROW_SSSE3
|
||||||
|
|
||||||
#if !defined(LIBYUV_BIT_EXACT)
|
#if !defined(LIBYUV_BIT_EXACT)
|
||||||
|
// TODO: adjust row_win to use 8 bit negative coefficients.
|
||||||
#define HAS_ABGRTOUVJROW_SSSE3
|
#define HAS_ABGRTOUVJROW_SSSE3
|
||||||
|
#define HAS_ARGBTOUVJROW_SSSE3
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64__) || !defined(__pic__)
|
#if defined(__x86_64__) || !defined(__pic__)
|
||||||
@ -286,9 +288,15 @@ extern "C" {
|
|||||||
|
|
||||||
// The following are available for AVX2 gcc/clang x86 platforms:
|
// The following are available for AVX2 gcc/clang x86 platforms:
|
||||||
// TODO(fbarchard): Port to Visual C
|
// TODO(fbarchard): Port to Visual C
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(__x86_64__) || defined(__i386__)) && \
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
|
#define HAS_RAWTOYJROW_AVX2
|
||||||
|
#define HAS_RGB24TOYJROW_AVX2
|
||||||
|
|
||||||
|
#define HAS_ARGBTOYJROW_AVX2
|
||||||
|
#define HAS_ARGBTOYROW_AVX2
|
||||||
#define HAS_AB64TOARGBROW_AVX2
|
#define HAS_AB64TOARGBROW_AVX2
|
||||||
#define HAS_ABGRTOAR30ROW_AVX2
|
#define HAS_ABGRTOAR30ROW_AVX2
|
||||||
#define HAS_ABGRTOYJROW_AVX2
|
#define HAS_ABGRTOYJROW_AVX2
|
||||||
@ -345,6 +353,8 @@ extern "C" {
|
|||||||
#if !defined(LIBYUV_BIT_EXACT)
|
#if !defined(LIBYUV_BIT_EXACT)
|
||||||
#define HAS_ABGRTOUVJROW_AVX2
|
#define HAS_ABGRTOUVJROW_AVX2
|
||||||
#define HAS_ABGRTOUVROW_AVX2
|
#define HAS_ABGRTOUVROW_AVX2
|
||||||
|
#define HAS_ARGBTOUVJROW_AVX2
|
||||||
|
#define HAS_ARGBTOUVROW_AVX2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64__) || !defined(__pic__)
|
#if defined(__x86_64__) || !defined(__pic__)
|
||||||
@ -358,8 +368,9 @@ extern "C" {
|
|||||||
// The following are available for AVX512 clang x86 platforms:
|
// The following are available for AVX512 clang x86 platforms:
|
||||||
// TODO(fbarchard): Port to GCC and Visual C
|
// TODO(fbarchard): Port to GCC and Visual C
|
||||||
// TODO(b/42280744): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI.
|
// TODO(b/42280744): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(__x86_64__) || defined(__i386__)) && defined(CLANG_HAS_AVX512)
|
(defined(__x86_64__) || defined(__i386__)) && defined(CLANG_HAS_AVX512) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
#define HAS_COPYROW_AVX512BW
|
#define HAS_COPYROW_AVX512BW
|
||||||
#define HAS_ARGBTORGB24ROW_AVX512VBMI
|
#define HAS_ARGBTORGB24ROW_AVX512VBMI
|
||||||
#define HAS_CONVERT16TO8ROW_AVX512BW
|
#define HAS_CONVERT16TO8ROW_AVX512BW
|
||||||
|
|||||||
@ -21,8 +21,10 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The following are available on all x86 platforms:
|
// The following are available on all x86 platforms:
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
(defined(_M_IX86) || \
|
||||||
|
(defined(__x86_64__) && !defined(LIBYUV_ENABLE_ROWWIN)) || \
|
||||||
|
defined(__i386__))
|
||||||
#define HAS_FIXEDDIV1_X86
|
#define HAS_FIXEDDIV1_X86
|
||||||
#define HAS_FIXEDDIV_X86
|
#define HAS_FIXEDDIV_X86
|
||||||
#define HAS_SCALEADDROW_SSE2
|
#define HAS_SCALEADDROW_SSE2
|
||||||
@ -41,7 +43,9 @@ extern "C" {
|
|||||||
|
|
||||||
// The following are available for gcc/clang x86 platforms:
|
// The following are available for gcc/clang x86 platforms:
|
||||||
// TODO(fbarchard): Port to Visual C
|
// TODO(fbarchard): Port to Visual C
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
|
#define HAS_SCALEUVROWDOWN2BOX_SSSE3
|
||||||
#define HAS_SCALEROWUP2_LINEAR_SSE2
|
#define HAS_SCALEROWUP2_LINEAR_SSE2
|
||||||
#define HAS_SCALEROWUP2_LINEAR_SSSE3
|
#define HAS_SCALEROWUP2_LINEAR_SSSE3
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1905
|
#define LIBYUV_VERSION 1906
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -19,7 +19,9 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This module is for GCC x86 and x64.
|
// This module is for GCC x86 and x64.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
|
|
||||||
// "memory" clobber prevents the reads from being removed
|
// "memory" clobber prevents the reads from being removed
|
||||||
|
|
||||||
|
|||||||
@ -116,7 +116,7 @@ uint32_t HashDjb2_NEON(const uint8_t* src, int count, uint32_t seed) {
|
|||||||
uint32_t hash = seed;
|
uint32_t hash = seed;
|
||||||
const uint32_t c16 = 0x92d9e201; // 33^16
|
const uint32_t c16 = 0x92d9e201; // 33^16
|
||||||
uint32_t tmp, tmp2;
|
uint32_t tmp, tmp2;
|
||||||
asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
|
asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
|
||||||
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n"
|
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n"
|
||||||
|
|
||||||
// count is always a multiple of 16.
|
// count is always a multiple of 16.
|
||||||
|
|||||||
@ -23,8 +23,8 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This module is for 32 bit Visual C x86
|
// This module is for 32 bit Visual C x86
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_IX86) && \
|
||||||
!defined(__clang__) && defined(_M_IX86)
|
(!defined(__clang__) || defined(LIBYUV_ENABLE_ROWWIN))
|
||||||
|
|
||||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||||
const uint8_t* src_b,
|
const uint8_t* src_b,
|
||||||
|
|||||||
@ -4034,16 +4034,22 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
|||||||
// Neon version does direct RGB565 to YUV.
|
// Neon version does direct RGB565 to YUV.
|
||||||
#if defined(HAS_RGB565TOYROW_NEON)
|
#if defined(HAS_RGB565TOYROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
|
|
||||||
RGB565ToYRow = RGB565ToYRow_Any_NEON;
|
RGB565ToYRow = RGB565ToYRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 16)) {
|
||||||
RGB565ToYRow = RGB565ToYRow_NEON;
|
RGB565ToYRow = RGB565ToYRow_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// Neon version does direct RGB565 to YUV.
|
||||||
|
#if defined(HAS_RGB565TOUVROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
RGB565ToUVRow = RGB565ToUVRow_NEON;
|
RGB565ToUVRow = RGB565ToUVRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
// MSA version does direct RGB565 to YUV.
|
// MSA version does direct RGB565 to YUV.
|
||||||
#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX) || \
|
|
||||||
defined(HAS_RGB565TOYROW_LASX))
|
|
||||||
#if defined(HAS_RGB565TOYROW_MSA) && defined(HAS_RGB565TOUVROW_MSA)
|
#if defined(HAS_RGB565TOYROW_MSA) && defined(HAS_RGB565TOUVROW_MSA)
|
||||||
if (TestCpuFlag(kCpuHasMSA)) {
|
if (TestCpuFlag(kCpuHasMSA)) {
|
||||||
RGB565ToUVRow = RGB565ToUVRow_Any_MSA;
|
RGB565ToUVRow = RGB565ToUVRow_Any_MSA;
|
||||||
@ -4075,7 +4081,6 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// Other platforms do intermediate conversion from RGB565 to ARGB.
|
// Other platforms do intermediate conversion from RGB565 to ARGB.
|
||||||
#else
|
|
||||||
#if defined(HAS_RGB565TOARGBROW_SSE2)
|
#if defined(HAS_RGB565TOARGBROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
|
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
|
||||||
@ -4100,14 +4105,6 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
|
||||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
|
||||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
|
||||||
if (IS_ALIGNED(width, 16)) {
|
|
||||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#if defined(HAS_ARGBTOYROW_AVX2)
|
#if defined(HAS_ARGBTOYROW_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
ARGBToYRow = ARGBToYRow_Any_AVX2;
|
||||||
@ -4116,6 +4113,14 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_ARGBTOUVROW_SSSE3)
|
||||||
|
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||||
|
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_ARGBTOUVROW_AVX2)
|
#if defined(HAS_ARGBTOUVROW_AVX2)
|
||||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||||
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
|
||||||
@ -4123,7 +4128,6 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
|||||||
ARGBToUVRow = ARGBToUVRow_AVX2;
|
ARGBToUVRow = ARGBToUVRow_AVX2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
|
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
|
||||||
@ -4214,18 +4218,22 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
|||||||
// Neon version does direct ARGB1555 to YUV.
|
// Neon version does direct ARGB1555 to YUV.
|
||||||
#if defined(HAS_ARGB1555TOYROW_NEON)
|
#if defined(HAS_ARGB1555TOYROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
|
|
||||||
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
|
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 8)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
ARGB1555ToYRow = ARGB1555ToYRow_NEON;
|
ARGB1555ToYRow = ARGB1555ToYRow_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
|
||||||
ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_ARGB1555TOUVROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// MSA version does direct ARGB1555 to YUV.
|
// MSA version does direct ARGB1555 to YUV.
|
||||||
#elif (defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_LSX) || \
|
|
||||||
defined(HAS_ARGB1555TOYROW_LASX))
|
|
||||||
#if defined(HAS_ARGB1555TOYROW_MSA) && defined(HAS_ARGB1555TOUVROW_MSA)
|
#if defined(HAS_ARGB1555TOYROW_MSA) && defined(HAS_ARGB1555TOUVROW_MSA)
|
||||||
if (TestCpuFlag(kCpuHasMSA)) {
|
if (TestCpuFlag(kCpuHasMSA)) {
|
||||||
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA;
|
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA;
|
||||||
@ -4256,8 +4264,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Other platforms do intermediate conversion from ARGB1555 to ARGB.
|
// Other platforms do intermediate conversion from ARGB1555 to ARGB.
|
||||||
#else
|
|
||||||
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
|
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
|
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
|
||||||
@ -4305,7 +4313,6 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
|||||||
ARGBToUVRow = ARGBToUVRow_AVX2;
|
ARGBToUVRow = ARGBToUVRow_AVX2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
|
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
|
||||||
@ -4398,17 +4405,20 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
|||||||
// Neon version does direct ARGB4444 to YUV.
|
// Neon version does direct ARGB4444 to YUV.
|
||||||
#if defined(HAS_ARGB4444TOYROW_NEON)
|
#if defined(HAS_ARGB4444TOYROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
|
|
||||||
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
|
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 8)) {
|
if (IS_ALIGNED(width, 8)) {
|
||||||
ARGB4444ToYRow = ARGB4444ToYRow_NEON;
|
ARGB4444ToYRow = ARGB4444ToYRow_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
|
||||||
ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Other platforms do intermediate conversion from ARGB4444 to ARGB.
|
#endif
|
||||||
#else
|
#if defined(HAS_ARGB4444TOUVROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
|
||||||
|
if (IS_ALIGNED(width, 16)) {
|
||||||
|
ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
|
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
|
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
|
||||||
@ -4520,7 +4530,6 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
|||||||
ARGBToUVRow = ARGBToUVRow_LASX;
|
ARGBToUVRow = ARGBToUVRow_LASX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|||||||
@ -17,7 +17,9 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This module is for GCC x86 and x64.
|
// This module is for GCC x86 and x64.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
|
|
||||||
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
|
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
|
||||||
#if defined(HAS_TRANSPOSEWX8_SSSE3)
|
#if defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||||
|
|||||||
@ -17,8 +17,8 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This module is for 32 bit Visual C x86
|
// This module is for 32 bit Visual C x86
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_IX86) && \
|
||||||
!defined(__clang__) && defined(_M_IX86)
|
(!defined(__clang__) || defined(LIBYUV_ENABLE_ROWWIN))
|
||||||
|
|
||||||
__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
|
__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
|
||||||
int src_stride,
|
int src_stride,
|
||||||
|
|||||||
@ -36,14 +36,6 @@ extern "C" {
|
|||||||
// LIBYUV_UNLIMITED_BT709
|
// LIBYUV_UNLIMITED_BT709
|
||||||
// LIBYUV_UNLIMITED_BT2020
|
// LIBYUV_UNLIMITED_BT2020
|
||||||
|
|
||||||
// The following macro from row_win makes the C code match the row_win code,
|
|
||||||
// which is 7 bit fixed point for ARGBToI420:
|
|
||||||
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
|
|
||||||
defined(_MSC_VER) && !defined(__clang__) && \
|
|
||||||
(defined(_M_IX86) || defined(_M_X64))
|
|
||||||
#define LIBYUV_RGB7 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
|
#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
|
||||||
defined(__i386__) || defined(_M_IX86))
|
defined(__i386__) || defined(_M_IX86))
|
||||||
#define LIBYUV_ARGBTOUV_PAVGB 1
|
#define LIBYUV_ARGBTOUV_PAVGB 1
|
||||||
@ -623,11 +615,21 @@ void AR64ShuffleRow_C(const uint8_t* src_ar64,
|
|||||||
dst_ar64_16 += 4;
|
dst_ar64_16 += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// BT601 8 bit Y:
|
||||||
|
// b 0.114 * 219 = 24.966 = 25
|
||||||
|
// g 0.587 * 219 = 128.553 = 129
|
||||||
|
// r 0.299 * 219 = 65.481 = 66
|
||||||
|
// BT601 8 bit U:
|
||||||
|
// b 0.875 * 128 = 112.0 = 112
|
||||||
|
// g -0.5781 * 128 = −73.9968 = -74
|
||||||
|
// r -0.2969 * 128 = −38.0032 = -38
|
||||||
|
// BT601 8 bit V:
|
||||||
|
// b -0.1406 * 128 = −17.9968 = -18
|
||||||
|
// g -0.7344 * 128 = −94.0032 = -94
|
||||||
|
// r 0.875 * 128 = 112.0 = 112
|
||||||
|
|
||||||
#ifdef LIBYUV_RGB7
|
|
||||||
// Old 7 bit math for Visual C
|
|
||||||
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
|
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
|
||||||
return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16);
|
return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
|
||||||
}
|
}
|
||||||
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
|
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
|
||||||
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
|
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
|
||||||
@ -635,37 +637,10 @@ static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
|
|||||||
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
||||||
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
|
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
// 8 bit
|
|
||||||
// Intel SSE/AVX uses the following equivalent formula
|
|
||||||
// 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
|
|
||||||
// return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
|
|
||||||
// 0x7e80) >> 8;
|
|
||||||
|
|
||||||
static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
|
|
||||||
return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
|
|
||||||
}
|
|
||||||
static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
|
|
||||||
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
|
|
||||||
}
|
|
||||||
static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
|
|
||||||
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
|
#define AVGB(a, b) (((a) + (b) + 1) >> 1)
|
||||||
// ARM uses uint16. TODO: Make ARM use uint8 to allow dotproduct.
|
|
||||||
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
|
|
||||||
static __inline int RGBxToU(uint16_t r, uint16_t g, uint16_t b) {
|
|
||||||
return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
|
|
||||||
}
|
|
||||||
static __inline int RGBxToV(uint16_t r, uint16_t g, uint16_t b) {
|
|
||||||
return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ARGBToY_C and ARGBToUV_C
|
// ARGBToY_C and ARGBToUV_C
|
||||||
// Intel version mimic SSE/AVX which does 2 pavgb
|
// Intel version of UV mimic SSE/AVX which does 2 pavgb
|
||||||
#if defined(LIBYUV_ARGBTOUV_PAVGB)
|
#if defined(LIBYUV_ARGBTOUV_PAVGB)
|
||||||
#define MAKEROWY(NAME, R, G, B, BPP) \
|
#define MAKEROWY(NAME, R, G, B, BPP) \
|
||||||
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
|
||||||
@ -718,28 +693,28 @@ static __inline int RGBxToV(uint16_t r, uint16_t g, uint16_t b) {
|
|||||||
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width - 1; x += 2) { \
|
for (x = 0; x < width - 1; x += 2) { \
|
||||||
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
uint8_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
||||||
src_rgb1[B + BPP] + 2) >> \
|
src_rgb1[B + BPP] + 2) >> \
|
||||||
2; \
|
2; \
|
||||||
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
uint8_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
||||||
src_rgb1[G + BPP] + 2) >> \
|
src_rgb1[G + BPP] + 2) >> \
|
||||||
2; \
|
2; \
|
||||||
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
uint8_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
||||||
src_rgb1[R + BPP] + 2) >> \
|
src_rgb1[R + BPP] + 2) >> \
|
||||||
2; \
|
2; \
|
||||||
dst_u[0] = RGBxToU(ar, ag, ab); \
|
dst_u[0] = RGBToU(ar, ag, ab); \
|
||||||
dst_v[0] = RGBxToV(ar, ag, ab); \
|
dst_v[0] = RGBToV(ar, ag, ab); \
|
||||||
src_rgb += BPP * 2; \
|
src_rgb += BPP * 2; \
|
||||||
src_rgb1 += BPP * 2; \
|
src_rgb1 += BPP * 2; \
|
||||||
dst_u += 1; \
|
dst_u += 1; \
|
||||||
dst_v += 1; \
|
dst_v += 1; \
|
||||||
} \
|
} \
|
||||||
if (width & 1) { \
|
if (width & 1) { \
|
||||||
uint16_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1; \
|
uint8_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1; \
|
||||||
uint16_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1; \
|
uint8_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1; \
|
||||||
uint16_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1; \
|
uint8_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1; \
|
||||||
dst_u[0] = RGBxToU(ar, ag, ab); \
|
dst_u[0] = RGBToU(ar, ag, ab); \
|
||||||
dst_v[0] = RGBxToV(ar, ag, ab); \
|
dst_v[0] = RGBToV(ar, ag, ab); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -752,32 +727,15 @@ MAKEROWY(RGB24, 2, 1, 0, 3)
|
|||||||
MAKEROWY(RAW, 0, 1, 2, 3)
|
MAKEROWY(RAW, 0, 1, 2, 3)
|
||||||
#undef MAKEROWY
|
#undef MAKEROWY
|
||||||
|
|
||||||
// JPeg uses a variation on BT.601-1 full range
|
// JPeg uses BT.601-1 full range
|
||||||
// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
|
// y = 0.29900 * r + 0.58700 * g + 0.11400 * b
|
||||||
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
|
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
|
||||||
// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
|
// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
|
||||||
// BT.601 Mpeg range uses:
|
|
||||||
// b 0.1016 * 255 = 25.908 = 25
|
|
||||||
// g 0.5078 * 255 = 129.489 = 129
|
|
||||||
// r 0.2578 * 255 = 65.739 = 66
|
|
||||||
// JPeg 7 bit Y (deprecated)
|
|
||||||
// b 0.11400 * 128 = 14.592 = 15
|
|
||||||
// g 0.58700 * 128 = 75.136 = 75
|
|
||||||
// r 0.29900 * 128 = 38.272 = 38
|
|
||||||
// JPeg 8 bit Y:
|
// JPeg 8 bit Y:
|
||||||
// b 0.11400 * 256 = 29.184 = 29
|
// b 0.11400 * 256 = 29.184 = 29
|
||||||
// g 0.58700 * 256 = 150.272 = 150
|
// g 0.58700 * 256 = 150.272 = 150
|
||||||
// r 0.29900 * 256 = 76.544 = 77
|
// r 0.29900 * 256 = 76.544 = 77
|
||||||
// JPeg 8 bit U:
|
// JPeg 8 bit U:
|
||||||
// b 0.50000 * 255 = 127.5 = 127
|
|
||||||
// g -0.33126 * 255 = -84.4713 = -84
|
|
||||||
// r -0.16874 * 255 = -43.0287 = -43
|
|
||||||
// JPeg 8 bit V:
|
|
||||||
// b -0.08131 * 255 = -20.73405 = -20
|
|
||||||
// g -0.41869 * 255 = -106.76595 = -107
|
|
||||||
// r 0.50000 * 255 = 127.5 = 127
|
|
||||||
// TODO: consider 256 for fixed point on UV
|
|
||||||
// JPeg 8 bit U:
|
|
||||||
// b 0.50000 * 256 = 128.0 = 128
|
// b 0.50000 * 256 = 128.0 = 128
|
||||||
// g -0.33126 * 256 = −84.80256 = -85
|
// g -0.33126 * 256 = −84.80256 = -85
|
||||||
// r -0.16874 * 256 = −43.19744 = -43
|
// r -0.16874 * 256 = −43.19744 = -43
|
||||||
@ -786,32 +744,16 @@ MAKEROWY(RAW, 0, 1, 2, 3)
|
|||||||
// g -0.41869 * 256 = −107.18464 = -107
|
// g -0.41869 * 256 = −107.18464 = -107
|
||||||
// r 0.50000 * 256 = 128.0 = 128
|
// r 0.50000 * 256 = 128.0 = 128
|
||||||
|
|
||||||
#ifdef LIBYUV_RGB7
|
|
||||||
// Old 7 bit math for compatibility on unsupported platforms.
|
|
||||||
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
|
|
||||||
return (38 * r + 75 * g + 15 * b + 64) >> 7;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// 8 bit
|
// 8 bit
|
||||||
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
|
static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
|
||||||
return (77 * r + 150 * g + 29 * b + 128) >> 8;
|
return (77 * r + 150 * g + 29 * b + 128) >> 8;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
|
static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
|
||||||
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
|
return (128 * b - 85 * g - 43 * r + 0x8000) >> 8;
|
||||||
}
|
}
|
||||||
static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
|
static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
|
||||||
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
|
return (128 * r - 107 * g - 21 * b + 0x8000) >> 8;
|
||||||
}
|
}
|
||||||
#if !defined(LIBYUV_ARGBTOUV_PAVGB)
|
|
||||||
static __inline uint8_t RGBxToUJ(uint16_t r, uint16_t g, uint16_t b) {
|
|
||||||
return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
|
|
||||||
}
|
|
||||||
static __inline uint8_t RGBxToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
|
||||||
return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ARGBToYJ_C and ARGBToUVJ_C
|
// ARGBToYJ_C and ARGBToUVJ_C
|
||||||
// Intel version mimic SSE/AVX which does 2 pavgb
|
// Intel version mimic SSE/AVX which does 2 pavgb
|
||||||
@ -867,17 +809,17 @@ static __inline uint8_t RGBxToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
|||||||
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
|
||||||
int x; \
|
int x; \
|
||||||
for (x = 0; x < width - 1; x += 2) { \
|
for (x = 0; x < width - 1; x += 2) { \
|
||||||
uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
uint8_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
|
||||||
src_rgb1[B + BPP] + 2) >> \
|
src_rgb1[B + BPP] + 2) >> \
|
||||||
2; \
|
2; \
|
||||||
uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
uint8_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
|
||||||
src_rgb1[G + BPP] + 2) >> \
|
src_rgb1[G + BPP] + 2) >> \
|
||||||
2; \
|
2; \
|
||||||
uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
uint8_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
|
||||||
src_rgb1[R + BPP] + 2) >> \
|
src_rgb1[R + BPP] + 2) >> \
|
||||||
2; \
|
2; \
|
||||||
dst_u[0] = RGBxToUJ(ar, ag, ab); \
|
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||||
dst_v[0] = RGBxToVJ(ar, ag, ab); \
|
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||||
src_rgb += BPP * 2; \
|
src_rgb += BPP * 2; \
|
||||||
src_rgb1 += BPP * 2; \
|
src_rgb1 += BPP * 2; \
|
||||||
dst_u += 1; \
|
dst_u += 1; \
|
||||||
@ -887,8 +829,8 @@ static __inline uint8_t RGBxToVJ(uint16_t r, uint16_t g, uint16_t b) {
|
|||||||
uint16_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1; \
|
uint16_t ab = (src_rgb[B] + src_rgb1[B] + 1) >> 1; \
|
||||||
uint16_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1; \
|
uint16_t ag = (src_rgb[G] + src_rgb1[G] + 1) >> 1; \
|
||||||
uint16_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1; \
|
uint16_t ar = (src_rgb[R] + src_rgb1[R] + 1) >> 1; \
|
||||||
dst_u[0] = RGBxToUJ(ar, ag, ab); \
|
dst_u[0] = RGBToUJ(ar, ag, ab); \
|
||||||
dst_v[0] = RGBxToVJ(ar, ag, ab); \
|
dst_v[0] = RGBToVJ(ar, ag, ab); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -993,11 +935,11 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
|
|||||||
dst_u[0] = RGBToU(ar, ag, ab);
|
dst_u[0] = RGBToU(ar, ag, ab);
|
||||||
dst_v[0] = RGBToV(ar, ag, ab);
|
dst_v[0] = RGBToV(ar, ag, ab);
|
||||||
#else
|
#else
|
||||||
uint16_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
||||||
uint16_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
||||||
uint16_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
||||||
dst_u[0] = RGBxToU(r, g, b);
|
dst_u[0] = RGBToU(r, g, b);
|
||||||
dst_v[0] = RGBxToV(r, g, b);
|
dst_v[0] = RGBToV(r, g, b);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
src_rgb565 += 4;
|
src_rgb565 += 4;
|
||||||
@ -1021,19 +963,11 @@ void RGB565ToUVRow_C(const uint8_t* src_rgb565,
|
|||||||
g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
|
g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
|
||||||
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
|
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
|
||||||
|
|
||||||
#if defined(LIBYUV_ARGBTOUV_PAVGB)
|
|
||||||
uint8_t ab = AVGB(b0, b2);
|
uint8_t ab = AVGB(b0, b2);
|
||||||
uint8_t ag = AVGB(g0, g2);
|
uint8_t ag = AVGB(g0, g2);
|
||||||
uint8_t ar = AVGB(r0, r2);
|
uint8_t ar = AVGB(r0, r2);
|
||||||
dst_u[0] = RGBToU(ar, ag, ab);
|
dst_u[0] = RGBToU(ar, ag, ab);
|
||||||
dst_v[0] = RGBToV(ar, ag, ab);
|
dst_v[0] = RGBToV(ar, ag, ab);
|
||||||
#else
|
|
||||||
uint16_t b = (b0 + b2 + 1) >> 1;
|
|
||||||
uint16_t g = (g0 + g2 + 1) >> 1;
|
|
||||||
uint16_t r = (r0 + r2 + 1) >> 1;
|
|
||||||
dst_u[0] = RGBxToU(r, g, b);
|
|
||||||
dst_v[0] = RGBxToV(r, g, b);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1082,11 +1016,11 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
|
|||||||
dst_u[0] = RGBToU(ar, ag, ab);
|
dst_u[0] = RGBToU(ar, ag, ab);
|
||||||
dst_v[0] = RGBToV(ar, ag, ab);
|
dst_v[0] = RGBToV(ar, ag, ab);
|
||||||
#else
|
#else
|
||||||
uint16_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
||||||
uint16_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
||||||
uint16_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
||||||
dst_u[0] = RGBxToU(r, g, b);
|
dst_u[0] = RGBToU(r, g, b);
|
||||||
dst_v[0] = RGBxToV(r, g, b);
|
dst_v[0] = RGBToV(r, g, b);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
src_argb1555 += 4;
|
src_argb1555 += 4;
|
||||||
@ -1111,19 +1045,11 @@ void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
|
|||||||
g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
|
g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
|
||||||
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
|
r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
|
||||||
|
|
||||||
#if defined(LIBYUV_ARGBTOUV_PAVGB)
|
|
||||||
uint8_t ab = AVGB(b0, b2);
|
uint8_t ab = AVGB(b0, b2);
|
||||||
uint8_t ag = AVGB(g0, g2);
|
uint8_t ag = AVGB(g0, g2);
|
||||||
uint8_t ar = AVGB(r0, r2);
|
uint8_t ar = AVGB(r0, r2);
|
||||||
dst_u[0] = RGBToU(ar, ag, ab);
|
dst_u[0] = RGBToU(ar, ag, ab);
|
||||||
dst_v[0] = RGBToV(ar, ag, ab);
|
dst_v[0] = RGBToV(ar, ag, ab);
|
||||||
#else
|
|
||||||
uint16_t b = (b0 + b2 + 1) >> 1;
|
|
||||||
uint16_t g = (g0 + g2 + 1) >> 1;
|
|
||||||
uint16_t r = (r0 + r2 + 1) >> 1;
|
|
||||||
dst_u[0] = RGBxToU(r, g, b);
|
|
||||||
dst_v[0] = RGBxToV(r, g, b);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1168,11 +1094,11 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
|
|||||||
dst_u[0] = RGBToU(ar, ag, ab);
|
dst_u[0] = RGBToU(ar, ag, ab);
|
||||||
dst_v[0] = RGBToV(ar, ag, ab);
|
dst_v[0] = RGBToV(ar, ag, ab);
|
||||||
#else
|
#else
|
||||||
uint16_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
uint8_t b = (b0 + b1 + b2 + b3 + 2) >> 2;
|
||||||
uint16_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
uint8_t g = (g0 + g1 + g2 + g3 + 2) >> 2;
|
||||||
uint16_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
uint8_t r = (r0 + r1 + r2 + r3 + 2) >> 2;
|
||||||
dst_u[0] = RGBxToU(r, g, b);
|
dst_u[0] = RGBToU(r, g, b);
|
||||||
dst_v[0] = RGBxToV(r, g, b);
|
dst_v[0] = RGBToV(r, g, b);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
src_argb4444 += 4;
|
src_argb4444 += 4;
|
||||||
@ -1195,19 +1121,11 @@ void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
|
|||||||
g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
|
g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
|
||||||
r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
|
r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
|
||||||
|
|
||||||
#if defined(LIBYUV_ARGBTOUV_PAVGB)
|
|
||||||
uint8_t ab = AVGB(b0, b2);
|
uint8_t ab = AVGB(b0, b2);
|
||||||
uint8_t ag = AVGB(g0, g2);
|
uint8_t ag = AVGB(g0, g2);
|
||||||
uint8_t ar = AVGB(r0, r2);
|
uint8_t ar = AVGB(r0, r2);
|
||||||
dst_u[0] = RGBToU(ar, ag, ab);
|
dst_u[0] = RGBToU(ar, ag, ab);
|
||||||
dst_v[0] = RGBToV(ar, ag, ab);
|
dst_v[0] = RGBToV(ar, ag, ab);
|
||||||
#else
|
|
||||||
uint16_t b = (b0 + b2 + 1) >> 1;
|
|
||||||
uint16_t g = (g0 + g2 + 1) >> 1;
|
|
||||||
uint16_t r = (r0 + r2 + 1) >> 1;
|
|
||||||
dst_u[0] = RGBxToU(r, g, b);
|
|
||||||
dst_v[0] = RGBxToV(r, g, b);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4036,7 +3954,7 @@ void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
#define MAXTWIDTH 2048
|
#define MAXTWIDTH 2048
|
||||||
|
|
||||||
#if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
|
#if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
|
||||||
defined(HAS_I422TORGB565ROW_SSSE3)
|
defined(HAS_I422TORGB565ROW_SSSE3) && !defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
// row_win.cc has asm version, but GCC uses 2 step wrapper.
|
// row_win.cc has asm version, but GCC uses 2 step wrapper.
|
||||||
void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
|
void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
|
||||||
const uint8_t* src_u,
|
const uint8_t* src_u,
|
||||||
|
|||||||
1541
source/row_gcc.cc
1541
source/row_gcc.cc
File diff suppressed because it is too large
Load Diff
@ -143,7 +143,8 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READYUV444
|
"1: \n" //
|
||||||
|
READYUV444
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -165,7 +166,8 @@ void I444ToRGB24Row_NEON(const uint8_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"1: \n" READYUV444
|
"1: \n" //
|
||||||
|
READYUV444
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
|
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -188,7 +190,8 @@ void I422ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -211,7 +214,8 @@ void I444AlphaToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"1: \n" READYUV444
|
"1: \n" //
|
||||||
|
READYUV444
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
"vld1.8 {d6}, [%[src_a]]! \n"
|
"vld1.8 {d6}, [%[src_a]]! \n"
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
@ -236,7 +240,8 @@ void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
"vld1.8 {d6}, [%[src_a]]! \n"
|
"vld1.8 {d6}, [%[src_a]]! \n"
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
@ -261,9 +266,10 @@ void I422ToRGBARow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
STORERGBA "bgt 1b \n"
|
STORERGBA "bgt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
[src_u] "+r"(src_u), // %[src_u]
|
[src_u] "+r"(src_u), // %[src_u]
|
||||||
[src_v] "+r"(src_v), // %[src_v]
|
[src_v] "+r"(src_v), // %[src_v]
|
||||||
@ -283,7 +289,8 @@ void I422ToRGB24Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
|
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -313,7 +320,8 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
ARGBTORGB565
|
ARGBTORGB565
|
||||||
"vst1.8 {q2}, [%[dst_rgb565]]! \n" // store 8 pixels RGB565.
|
"vst1.8 {q2}, [%[dst_rgb565]]! \n" // store 8 pixels RGB565.
|
||||||
@ -345,7 +353,8 @@ void I422ToARGB1555Row_NEON(const uint8_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
|
||||||
"vmov.u8 d6, #0xff \n" ARGBTOARGB1555
|
"vmov.u8 d6, #0xff \n" ARGBTOARGB1555
|
||||||
"vst1.8 {q3}, [%[dst_argb1555]]! \n" // store 8 pixels RGB1555.
|
"vst1.8 {q3}, [%[dst_argb1555]]! \n" // store 8 pixels RGB1555.
|
||||||
@ -379,7 +388,8 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y,
|
|||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"vmov.u8 d7, #0x0f \n" // vbic bits to clear
|
"vmov.u8 d7, #0x0f \n" // vbic bits to clear
|
||||||
"1: \n" READYUV422 YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READYUV422 YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n" ARGBTOARGB4444
|
"subs %[width], %[width], #8 \n" ARGBTOARGB4444
|
||||||
"vst1.8 {q0}, [%[dst_argb4444]]! \n" // store 8 pixels
|
"vst1.8 {q0}, [%[dst_argb4444]]! \n" // store 8 pixels
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -400,7 +410,8 @@ void I400ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READYUV400 YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READYUV400 YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n"
|
"subs %[width], %[width], #8 \n"
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -437,7 +448,8 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READNV12 YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READNV12 YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n"
|
"subs %[width], %[width], #8 \n"
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -458,7 +470,8 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READNV21 YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READNV21 YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n"
|
"subs %[width], %[width], #8 \n"
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -479,7 +492,8 @@ void NV12ToRGB24Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READNV12 YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READNV12 YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n"
|
"subs %[width], %[width], #8 \n"
|
||||||
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
|
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -500,7 +514,8 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READNV21 YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READNV21 YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n"
|
"subs %[width], %[width], #8 \n"
|
||||||
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
|
"vst3.8 {d0, d2, d4}, [%[dst_rgb24]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -521,7 +536,8 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READNV12 YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READNV12 YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n" ARGBTORGB565
|
"subs %[width], %[width], #8 \n" ARGBTORGB565
|
||||||
"vst1.8 {q2}, [%[dst_rgb565]]! \n" // store 8 pixels RGB565.
|
"vst1.8 {q2}, [%[dst_rgb565]]! \n" // store 8 pixels RGB565.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -541,7 +557,8 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READYUY2 YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READYUY2 YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n"
|
"subs %[width], %[width], #8 \n"
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -560,7 +577,8 @@ void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"vmov.u8 d6, #255 \n"
|
"vmov.u8 d6, #255 \n"
|
||||||
"1: \n" READUYVY YUVTORGB RGBTORGB8
|
"1: \n" //
|
||||||
|
READUYVY YUVTORGB RGBTORGB8
|
||||||
"subs %[width], %[width], #8 \n"
|
"subs %[width], %[width], #8 \n"
|
||||||
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
"vst4.8 {d0, d2, d4, d6}, [%[dst_argb]]! \n"
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
@ -1819,6 +1837,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Coefficients expressed as negatives to allow 128
|
||||||
struct RgbUVConstants {
|
struct RgbUVConstants {
|
||||||
int8_t kRGBToU[4];
|
int8_t kRGBToU[4];
|
||||||
int8_t kRGBToV[4];
|
int8_t kRGBToV[4];
|
||||||
@ -1832,18 +1851,14 @@ static void ARGBToUV444MatrixRow_NEON(
|
|||||||
int width,
|
int width,
|
||||||
const struct RgbUVConstants* rgbuvconstants) {
|
const struct RgbUVConstants* rgbuvconstants) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
"vld1.8 {d0}, [%4] \n" // load rgbuvconstants
|
"vld1.8 {d0}, [%4] \n" // load rgbuvconstants
|
||||||
"vdup.u8 d24, d0[0] \n" // UB 0.875 coefficient
|
"vdup.u8 d24, d0[0] \n" // UB 0.875 coefficient
|
||||||
"vdup.u8 d25, d0[1] \n" // UG -0.5781 coefficient
|
"vdup.u8 d25, d0[1] \n" // UG -0.5781 coefficient
|
||||||
"vdup.u8 d26, d0[2] \n" // UR -0.2969 coefficient
|
"vdup.u8 d26, d0[2] \n" // UR -0.2969 coefficient
|
||||||
"vdup.u8 d27, d0[4] \n" // VB -0.1406 coefficient
|
"vdup.u8 d27, d0[4] \n" // VB -0.1406 coefficient
|
||||||
"vdup.u8 d28, d0[5] \n" // VG -0.7344 coefficient
|
"vdup.u8 d28, d0[5] \n" // VG -0.7344 coefficient
|
||||||
"vneg.s8 d25, d25 \n"
|
"vneg.s8 d24, d24 \n"
|
||||||
"vneg.s8 d26, d26 \n"
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"vneg.s8 d27, d27 \n"
|
|
||||||
"vneg.s8 d28, d28 \n"
|
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
|
||||||
@ -1856,8 +1871,8 @@ static void ARGBToUV444MatrixRow_NEON(
|
|||||||
"vmlsl.u8 q3, d1, d28 \n" // G
|
"vmlsl.u8 q3, d1, d28 \n" // G
|
||||||
"vmlsl.u8 q3, d0, d27 \n" // B
|
"vmlsl.u8 q3, d0, d27 \n" // B
|
||||||
|
|
||||||
"vaddhn.u16 d0, q2, q15 \n" // +128 -> unsigned
|
"vaddhn.u16 d0, q2, q15 \n" // signed -> unsigned
|
||||||
"vaddhn.u16 d1, q3, q15 \n" // +128 -> unsigned
|
"vaddhn.u16 d1, q3, q15 \n"
|
||||||
|
|
||||||
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
|
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
|
||||||
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
|
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
|
||||||
@ -1871,7 +1886,7 @@ static void ARGBToUV444MatrixRow_NEON(
|
|||||||
"q15");
|
"q15");
|
||||||
}
|
}
|
||||||
|
|
||||||
// RGB to bt601 coefficients
|
// RGB to BT601 coefficients
|
||||||
// UB 0.875 coefficient = 112
|
// UB 0.875 coefficient = 112
|
||||||
// UG -0.5781 coefficient = -74
|
// UG -0.5781 coefficient = -74
|
||||||
// UR -0.2969 coefficient = -38
|
// UR -0.2969 coefficient = -38
|
||||||
@ -1879,35 +1894,34 @@ static void ARGBToUV444MatrixRow_NEON(
|
|||||||
// VG -0.7344 coefficient = -94
|
// VG -0.7344 coefficient = -94
|
||||||
// VR 0.875 coefficient = 112
|
// VR 0.875 coefficient = 112
|
||||||
|
|
||||||
static const struct RgbUVConstants kRgb24I601UVConstants = {{112, -74, -38, 0},
|
static const struct RgbUVConstants kARGBI601UVConstants = {{-112, 74, 38, 0},
|
||||||
{-18, -94, 112, 0}};
|
{18, 94, -112, 0}};
|
||||||
|
|
||||||
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
||||||
&kRgb24I601UVConstants);
|
&kARGBI601UVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
// RGB to JPEG coefficients
|
// RGB to JPEG coefficients
|
||||||
// UB 0.500 coefficient = 127
|
// UB 0.500 coefficient = 128
|
||||||
// UG -0.33126 coefficient = -84
|
// UG -0.33126 coefficient = -85
|
||||||
// UR -0.16874 coefficient = -43
|
// UR -0.16874 coefficient = -43
|
||||||
// VB -0.08131 coefficient = -20
|
// VB -0.08131 coefficient = -21
|
||||||
// VG -0.41869 coefficient = -107
|
// VG -0.41869 coefficient = -107
|
||||||
// VR 0.500 coefficient = 127
|
// VR 0.500 coefficient = 128
|
||||||
|
|
||||||
static const struct RgbUVConstants kRgb24JPEGUVConstants = {
|
static const struct RgbUVConstants kARGBJPEGUVConstants = {{-128, 85, 43, 0},
|
||||||
{127, -84, -43, 0},
|
{21, 107, -128, 0}};
|
||||||
{-20, -107, 127, 0}};
|
|
||||||
|
|
||||||
void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
|
void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
||||||
&kRgb24JPEGUVConstants);
|
&kARGBJPEGUVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
@ -1936,7 +1950,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
||||||
@ -1976,12 +1990,12 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||||
"vmov.s16 q10, #127 \n" // UB / VR 0.500 coefficient
|
"vmov.s16 q10, #128 \n" // UB/VR 0.500 coefficient
|
||||||
"vmov.s16 q11, #84 \n" // UG -0.33126 coefficient
|
"vmov.s16 q11, #85 \n" // UG -0.33126 coefficient
|
||||||
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
||||||
"vmov.s16 q13, #20 \n" // VB -0.08131 coefficient
|
"vmov.s16 q13, #21 \n" // VB -0.08131 coefficient
|
||||||
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
||||||
@ -2021,12 +2035,12 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"add %1, %0, %1 \n" // src_stride + src_argb
|
"add %1, %0, %1 \n" // src_stride + src_argb
|
||||||
"vmov.s16 q10, #127 \n" // UB / VR 0.500 coefficient
|
"vmov.s16 q10, #128 \n" // UB/VR 0.500 coefficient
|
||||||
"vmov.s16 q11, #84 \n" // UG -0.33126 coefficient
|
"vmov.s16 q11, #85 \n" // UG -0.33126 coefficient
|
||||||
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
||||||
"vmov.s16 q13, #20 \n" // VB -0.08131 coefficient
|
"vmov.s16 q13, #21 \n" // VB -0.08131 coefficient
|
||||||
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
|
||||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
|
||||||
@ -2059,7 +2073,6 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(fbarchard): Subsample match C code.
|
|
||||||
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
||||||
int src_stride_rgb24,
|
int src_stride_rgb24,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -2067,12 +2080,12 @@ void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"add %1, %0, %1 \n" // src_stride + src_rgb24
|
"add %1, %0, %1 \n" // src_stride + src_rgb24
|
||||||
"vmov.s16 q10, #127 \n" // UB / VR 0.500 coefficient
|
"vmov.s16 q10, #128 \n" // UB/VR 0.500 coefficient
|
||||||
"vmov.s16 q11, #84 \n" // UG -0.33126 coefficient
|
"vmov.s16 q11, #85 \n" // UG -0.33126 coefficient
|
||||||
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
||||||
"vmov.s16 q13, #20 \n" // VB -0.08131 coefficient
|
"vmov.s16 q13, #21 \n" // VB -0.08131 coefficient
|
||||||
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
|
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
|
||||||
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
|
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
|
||||||
@ -2105,7 +2118,6 @@ void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(fbarchard): Subsample match C code.
|
|
||||||
void RAWToUVJRow_NEON(const uint8_t* src_raw,
|
void RAWToUVJRow_NEON(const uint8_t* src_raw,
|
||||||
int src_stride_raw,
|
int src_stride_raw,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
@ -2113,12 +2125,12 @@ void RAWToUVJRow_NEON(const uint8_t* src_raw,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"add %1, %0, %1 \n" // src_stride + src_raw
|
"add %1, %0, %1 \n" // src_stride + src_raw
|
||||||
"vmov.s16 q10, #127 \n" // UB / VR 0.500 coefficient
|
"vmov.s16 q10, #128 \n" // UB/VR 0.500 coefficient
|
||||||
"vmov.s16 q11, #84 \n" // UG -0.33126 coefficient
|
"vmov.s16 q11, #85 \n" // UG -0.33126 coefficient
|
||||||
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
"vmov.s16 q12, #43 \n" // UR -0.16874 coefficient
|
||||||
"vmov.s16 q13, #20 \n" // VB -0.08131 coefficient
|
"vmov.s16 q13, #21 \n" // VB -0.08131 coefficient
|
||||||
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
"vmov.s16 q14, #107 \n" // VG -0.41869 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
|
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
|
||||||
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
|
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
|
||||||
@ -2163,7 +2175,7 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
|
||||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels.
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels.
|
||||||
@ -2208,7 +2220,7 @@ void ABGRToUVRow_NEON(const uint8_t* src_abgr,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
|
||||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
|
||||||
@ -2253,7 +2265,7 @@ void RGBAToUVRow_NEON(const uint8_t* src_rgba,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
|
||||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels.
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels.
|
||||||
@ -2298,7 +2310,7 @@ void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
|
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels.
|
||||||
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
|
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels.
|
||||||
@ -2343,7 +2355,7 @@ void RAWToUVRow_NEON(const uint8_t* src_raw,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
|
"vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels.
|
||||||
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
|
"vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels.
|
||||||
@ -2389,7 +2401,7 @@ void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
|
"vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
|
||||||
"subs %4, %4, #16 \n" // 16 processed per loop.
|
"subs %4, %4, #16 \n" // 16 processed per loop.
|
||||||
@ -2454,7 +2466,7 @@ void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
|
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
|
||||||
"subs %4, %4, #16 \n" // 16 processed per loop.
|
"subs %4, %4, #16 \n" // 16 processed per loop.
|
||||||
@ -2519,7 +2531,7 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
|
|||||||
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
|
||||||
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
|
||||||
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
|
||||||
"vmov.u16 q15, #0x8080 \n" // 128.5
|
"vmov.u16 q15, #0x8000 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
|
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
|
||||||
"subs %4, %4, #16 \n" // 16 processed per loop.
|
"subs %4, %4, #16 \n" // 16 processed per loop.
|
||||||
@ -2748,10 +2760,11 @@ struct RgbConstants {
|
|||||||
// B * 0.1140 coefficient = 29
|
// B * 0.1140 coefficient = 29
|
||||||
// G * 0.5870 coefficient = 150
|
// G * 0.5870 coefficient = 150
|
||||||
// R * 0.2990 coefficient = 77
|
// R * 0.2990 coefficient = 77
|
||||||
// Add 0.5 = 0x80
|
// Add 0.5
|
||||||
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128};
|
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
|
||||||
|
0x0080};
|
||||||
|
|
||||||
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128};
|
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 0x0080};
|
||||||
|
|
||||||
// RGB to BT.601 coefficients
|
// RGB to BT.601 coefficients
|
||||||
// B * 0.1016 coefficient = 25
|
// B * 0.1016 coefficient = 25
|
||||||
|
|||||||
@ -242,7 +242,8 @@ void I444ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n" /* A */
|
"movi v19.8b, #255 \n" /* A */
|
||||||
"1: \n" READYUV444
|
"1: \n" //
|
||||||
|
READYUV444
|
||||||
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -264,7 +265,8 @@ void I444ToRGB24Row_NEON(const uint8_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"1: \n" READYUV444
|
"1: \n" //
|
||||||
|
READYUV444
|
||||||
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
||||||
"st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
|
"st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -289,11 +291,12 @@ void I210ToAR30Row_NEON(const uint16_t* src_y,
|
|||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
uint16_t alpha = 0xc000;
|
uint16_t alpha = 0xc000;
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"dup v23.8h, %w[alpha] \n"
|
"dup v23.8h, %w[alpha] \n"
|
||||||
"1: \n" READYUV210
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
READYUV210
|
||||||
"b.gt 1b \n"
|
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
||||||
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
[src_u] "+r"(src_u), // %[src_u]
|
[src_u] "+r"(src_u), // %[src_u]
|
||||||
[src_v] "+r"(src_v), // %[src_v]
|
[src_v] "+r"(src_v), // %[src_v]
|
||||||
@ -317,11 +320,12 @@ void I410ToAR30Row_NEON(const uint16_t* src_y,
|
|||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
uint16_t alpha = 0xc000;
|
uint16_t alpha = 0xc000;
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"dup v23.8h, %w[alpha] \n"
|
"dup v23.8h, %w[alpha] \n"
|
||||||
"1: \n" READYUV410
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
READYUV410
|
||||||
"b.gt 1b \n"
|
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
||||||
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
[src_u] "+r"(src_u), // %[src_u]
|
[src_u] "+r"(src_u), // %[src_u]
|
||||||
[src_v] "+r"(src_v), // %[src_v]
|
[src_v] "+r"(src_v), // %[src_v]
|
||||||
@ -344,11 +348,12 @@ void I212ToAR30Row_NEON(const uint16_t* src_y,
|
|||||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||||
const uint16_t limit = 0x3ff0;
|
const uint16_t limit = 0x3ff0;
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
||||||
"1: \n" READYUV212
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
READYUV212
|
||||||
"b.gt 1b \n"
|
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
||||||
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
[src_u] "+r"(src_u), // %[src_u]
|
[src_u] "+r"(src_u), // %[src_u]
|
||||||
[src_v] "+r"(src_v), // %[src_v]
|
[src_v] "+r"(src_v), // %[src_v]
|
||||||
@ -369,7 +374,8 @@ void I210ToARGBRow_NEON(const uint16_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"1: \n" READYUV210
|
"1: \n" //
|
||||||
|
READYUV210
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -392,7 +398,8 @@ void I410ToARGBRow_NEON(const uint16_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"1: \n" READYUV410
|
"1: \n" //
|
||||||
|
READYUV410
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -417,7 +424,8 @@ void I212ToARGBRow_NEON(const uint16_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"1: \n" READYUV212
|
"1: \n" //
|
||||||
|
READYUV212
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -440,7 +448,8 @@ void I422ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n" /* A */
|
"movi v19.8b, #255 \n" /* A */
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -521,12 +530,13 @@ void P210ToAR30Row_NEON(const uint16_t* src_y,
|
|||||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||||
const uint16_t limit = 0x3ff0;
|
const uint16_t limit = 0x3ff0;
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
||||||
"ldr q2, [%[kIndices]] \n"
|
"ldr q2, [%[kIndices]] \n"
|
||||||
"1: \n" READYUVP210
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
READYUVP210
|
||||||
"b.gt 1b \n"
|
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
||||||
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
[src_uv] "+r"(src_uv), // %[src_uv]
|
[src_uv] "+r"(src_uv), // %[src_uv]
|
||||||
[dst_ar30] "+r"(dst_ar30), // %[dst_ar30]
|
[dst_ar30] "+r"(dst_ar30), // %[dst_ar30]
|
||||||
@ -547,12 +557,13 @@ void P410ToAR30Row_NEON(const uint16_t* src_y,
|
|||||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
asm volatile(YUVTORGB_SETUP
|
asm volatile(YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
||||||
"ldr q2, [%[kIndices]] \n"
|
"ldr q2, [%[kIndices]] \n"
|
||||||
"1: \n" READYUVP410
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
READYUVP410
|
||||||
"b.gt 1b \n"
|
"subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30
|
||||||
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
[src_uv] "+r"(src_uv), // %[src_uv]
|
[src_uv] "+r"(src_uv), // %[src_uv]
|
||||||
[dst_ar30] "+r"(dst_ar30), // %[dst_ar30]
|
[dst_ar30] "+r"(dst_ar30), // %[dst_ar30]
|
||||||
@ -577,7 +588,8 @@ void I422ToAR30Row_NEON(const uint8_t* src_y,
|
|||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %w[width], %w[width], #8 \n" I4XXTORGB STOREAR30
|
"subs %w[width], %w[width], #8 \n" I4XXTORGB STOREAR30
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
@ -704,7 +716,8 @@ void I422ToRGBARow_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v15.8b, #255 \n" /* A */
|
"movi v15.8b, #255 \n" /* A */
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
||||||
"st4 {v15.8b,v16.8b,v17.8b,v18.8b}, [%[dst_rgba]], #32 \n"
|
"st4 {v15.8b,v16.8b,v17.8b,v18.8b}, [%[dst_rgba]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -726,7 +739,8 @@ void I422ToRGB24Row_NEON(const uint8_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
||||||
"st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
|
"st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -767,7 +781,8 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8_TOP
|
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8_TOP
|
||||||
ARGBTORGB565_FROM_TOP
|
ARGBTORGB565_FROM_TOP
|
||||||
"st1 {v18.8h}, [%[dst_rgb565]], #16 \n" // store 8 pixels RGB565.
|
"st1 {v18.8h}, [%[dst_rgb565]], #16 \n" // store 8 pixels RGB565.
|
||||||
@ -838,7 +853,8 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"1: \n" READYUV422
|
"1: \n" //
|
||||||
|
READYUV422
|
||||||
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8
|
||||||
"movi v19.8b, #255 \n" ARGBTOARGB4444
|
"movi v19.8b, #255 \n" ARGBTOARGB4444
|
||||||
"st1 {v0.8h}, [%[dst_argb4444]], #16 \n" // store 8
|
"st1 {v0.8h}, [%[dst_argb4444]], #16 \n" // store 8
|
||||||
@ -867,7 +883,8 @@ void I400ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
"umlal2 v6.8h, v1.16b, v31.16b \n" /* DG */
|
"umlal2 v6.8h, v1.16b, v31.16b \n" /* DG */
|
||||||
"umull v4.8h, v1.8b, v28.8b \n" /* DB */
|
"umull v4.8h, v1.8b, v28.8b \n" /* DB */
|
||||||
"umull2 v5.8h, v1.16b, v29.16b \n" /* DR */
|
"umull2 v5.8h, v1.16b, v29.16b \n" /* DR */
|
||||||
"1: \n" READYUV400 I400TORGB
|
"1: \n" //
|
||||||
|
READYUV400 I400TORGB
|
||||||
"subs %w[width], %w[width], #8 \n" RGBTORGB8
|
"subs %w[width], %w[width], #8 \n" RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -928,8 +945,8 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"ldr q2, [%[kNV12Table]] \n"
|
"ldr q2, [%[kNV12Table]] \n"
|
||||||
"1: \n" READNV12
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
READNV12 "subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
@ -951,8 +968,8 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
|
|||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"ldr q2, [%[kNV12Table]] \n"
|
"ldr q2, [%[kNV12Table]] \n"
|
||||||
"1: \n" READNV12
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
READNV12 "subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
@ -973,8 +990,8 @@ void NV12ToRGB24Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"ldr q2, [%[kNV12Table]] \n"
|
"ldr q2, [%[kNV12Table]] \n"
|
||||||
"1: \n" READNV12
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
READNV12 "subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
|
"st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
@ -995,8 +1012,8 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"ldr q2, [%[kNV12Table]] \n"
|
"ldr q2, [%[kNV12Table]] \n"
|
||||||
"1: \n" READNV12
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
READNV12 "subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
|
"st3 {v16.8b,v17.8b,v18.8b}, [%[dst_rgb24]], #24 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src_y] "+r"(src_y), // %[src_y]
|
: [src_y] "+r"(src_y), // %[src_y]
|
||||||
@ -1017,7 +1034,8 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"ldr q2, [%[kNV12Table]] \n"
|
"ldr q2, [%[kNV12Table]] \n"
|
||||||
"1: \n" READNV12
|
"1: \n" //
|
||||||
|
READNV12
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8_TOP
|
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8_TOP
|
||||||
ARGBTORGB565_FROM_TOP
|
ARGBTORGB565_FROM_TOP
|
||||||
"st1 {v18.8h}, [%[dst_rgb565]], #16 \n" // store 8
|
"st1 {v18.8h}, [%[dst_rgb565]], #16 \n" // store 8
|
||||||
@ -1042,8 +1060,8 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
|
|||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"ldr q2, [%[kNV21InterleavedTable]] \n"
|
"ldr q2, [%[kNV21InterleavedTable]] \n"
|
||||||
"1: \n" READYUY2
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
READYUY2 "subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src_yuy2] "+r"(src_yuy2), // %[src_yuy2]
|
: [src_yuy2] "+r"(src_yuy2), // %[src_yuy2]
|
||||||
@ -1063,8 +1081,8 @@ void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
|
|||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"ldr q2, [%[kNV12InterleavedTable]] \n"
|
"ldr q2, [%[kNV12InterleavedTable]] \n"
|
||||||
"1: \n" READUYVY
|
"1: \n" //
|
||||||
"subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
READUYVY "subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8
|
||||||
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
"st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%[dst_argb]], #32 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src_uyvy] "+r"(src_uyvy), // %[src_yuy2]
|
: [src_uyvy] "+r"(src_uyvy), // %[src_yuy2]
|
||||||
@ -2710,6 +2728,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Coefficients expressed as negatives to allow 128
|
||||||
struct RgbUVConstants {
|
struct RgbUVConstants {
|
||||||
int8_t kRGBToU[4];
|
int8_t kRGBToU[4];
|
||||||
int8_t kRGBToV[4];
|
int8_t kRGBToV[4];
|
||||||
@ -2729,11 +2748,8 @@ static void ARGBToUV444MatrixRow_NEON(
|
|||||||
"dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient
|
"dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient
|
||||||
"dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient
|
"dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient
|
||||||
"dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient
|
"dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient
|
||||||
"neg v25.16b, v25.16b \n"
|
"neg v24.16b, v24.16b \n"
|
||||||
"neg v26.16b, v26.16b \n"
|
"movi v29.8h, #0x80, lsl #8 \n" // 128.0
|
||||||
"neg v27.16b, v27.16b \n"
|
|
||||||
"neg v28.16b, v28.16b \n"
|
|
||||||
"movi v29.16b, #0x80 \n" // 128.5
|
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
|
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
|
||||||
@ -2747,8 +2763,8 @@ static void ARGBToUV444MatrixRow_NEON(
|
|||||||
"umlsl v3.8h, v1.8b, v28.8b \n" // G
|
"umlsl v3.8h, v1.8b, v28.8b \n" // G
|
||||||
"umlsl v3.8h, v0.8b, v27.8b \n" // B
|
"umlsl v3.8h, v0.8b, v27.8b \n" // B
|
||||||
|
|
||||||
"addhn v0.8b, v4.8h, v29.8h \n" // +128 -> unsigned
|
"addhn v0.8b, v4.8h, v29.8h \n" // signed -> unsigned
|
||||||
"addhn v1.8b, v3.8h, v29.8h \n" // +128 -> unsigned
|
"addhn v1.8b, v3.8h, v29.8h \n"
|
||||||
|
|
||||||
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
|
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
|
||||||
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
|
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
|
||||||
@ -2768,8 +2784,9 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width,
|
int width,
|
||||||
const struct RgbUVConstants* rgbuvconstants) {
|
const struct RgbUVConstants* rgbuvconstants) {
|
||||||
asm("ld2r {v16.4s, v17.4s}, [%[rgbuvconstants]] \n"
|
asm volatile(
|
||||||
"movi v29.16b, #0x80 \n" // 128.5
|
"ld2r {v16.4s, v17.4s}, [%[rgbuvconstants]] \n"
|
||||||
|
"movi v29.8h, #0x80, lsl #8 \n" // 128.0
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ldp q0, q1, [%[src]], #32 \n"
|
"ldp q0, q1, [%[src]], #32 \n"
|
||||||
"subs %w[width], %w[width], #8 \n" // 8 processed per loop.
|
"subs %w[width], %w[width], #8 \n" // 8 processed per loop.
|
||||||
@ -2784,8 +2801,8 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(
|
|||||||
"prfm pldl1keep, [%[src], 448] \n"
|
"prfm pldl1keep, [%[src], 448] \n"
|
||||||
"uzp1 v0.8h, v2.8h, v3.8h \n"
|
"uzp1 v0.8h, v2.8h, v3.8h \n"
|
||||||
"uzp1 v1.8h, v4.8h, v5.8h \n"
|
"uzp1 v1.8h, v4.8h, v5.8h \n"
|
||||||
"addhn v0.8b, v0.8h, v29.8h \n" // +128 -> unsigned
|
"subhn v0.8b, v29.8h, v0.8h \n" // -signed -> unsigned
|
||||||
"addhn v1.8b, v1.8h, v29.8h \n" // +128 -> unsigned
|
"subhn v1.8b, v29.8h, v1.8h \n"
|
||||||
"str d0, [%[dst_u]], #8 \n" // store 8 pixels U.
|
"str d0, [%[dst_u]], #8 \n" // store 8 pixels U.
|
||||||
"str d1, [%[dst_v]], #8 \n" // store 8 pixels V.
|
"str d1, [%[dst_v]], #8 \n" // store 8 pixels V.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
@ -2798,7 +2815,7 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(
|
|||||||
"v29");
|
"v29");
|
||||||
}
|
}
|
||||||
|
|
||||||
// RGB to bt601 coefficients
|
// RGB to BT601 coefficients
|
||||||
// UB 0.875 coefficient = 112
|
// UB 0.875 coefficient = 112
|
||||||
// UG -0.5781 coefficient = -74
|
// UG -0.5781 coefficient = -74
|
||||||
// UR -0.2969 coefficient = -38
|
// UR -0.2969 coefficient = -38
|
||||||
@ -2806,15 +2823,15 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(
|
|||||||
// VG -0.7344 coefficient = -94
|
// VG -0.7344 coefficient = -94
|
||||||
// VR 0.875 coefficient = 112
|
// VR 0.875 coefficient = 112
|
||||||
|
|
||||||
static const struct RgbUVConstants kRgb24I601UVConstants = {{112, -74, -38, 0},
|
static const struct RgbUVConstants kARGBI601UVConstants = {{-112, 74, 38, 0},
|
||||||
{-18, -94, 112, 0}};
|
{18, 94, -112, 0}};
|
||||||
|
|
||||||
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
||||||
&kRgb24I601UVConstants);
|
&kARGBI601UVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
|
void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||||
@ -2822,27 +2839,26 @@ void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width,
|
ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width,
|
||||||
&kRgb24I601UVConstants);
|
&kARGBI601UVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
// RGB to JPEG coefficients
|
// RGB to JPEG coefficients
|
||||||
// UB 0.500 coefficient = 127
|
// UB 0.500 coefficient = 128
|
||||||
// UG -0.33126 coefficient = -84
|
// UG -0.33126 coefficient = -85
|
||||||
// UR -0.16874 coefficient = -43
|
// UR -0.16874 coefficient = -43
|
||||||
// VB -0.08131 coefficient = -20
|
// VB -0.08131 coefficient = -21
|
||||||
// VG -0.41869 coefficient = -107
|
// VG -0.41869 coefficient = -107
|
||||||
// VR 0.500 coefficient = 127
|
// VR 0.500 coefficient = 128
|
||||||
|
|
||||||
static const struct RgbUVConstants kRgb24JPEGUVConstants = {
|
static const struct RgbUVConstants kARGBJPEGUVConstants = {{-128, 85, 43, 0},
|
||||||
{127, -84, -43, 0},
|
{21, 107, -128, 0}};
|
||||||
{-20, -107, 127, 0}};
|
|
||||||
|
|
||||||
void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
|
void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
|
||||||
&kRgb24JPEGUVConstants);
|
&kARGBJPEGUVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb,
|
void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb,
|
||||||
@ -2850,16 +2866,16 @@ void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width) {
|
int width) {
|
||||||
ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width,
|
ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width,
|
||||||
&kRgb24JPEGUVConstants);
|
&kARGBJPEGUVConstants);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RGBTOUV_SETUP_REG \
|
#define RGBTOUV_SETUP_REG \
|
||||||
"movi v20.8h, #112 \n" /* UB/VR coefficient (0.875) */ \
|
"movi v20.8h, #112 \n" /* UB/VR coefficient (0.875) */ \
|
||||||
"movi v21.8h, #74 \n" /* UG coefficient (-0.5781) */ \
|
"movi v21.8h, #74 \n" /* UG coefficient (-0.5781) */ \
|
||||||
"movi v22.8h, #38 \n" /* UR coefficient (-0.2969) */ \
|
"movi v22.8h, #38 \n" /* UR coefficient (-0.2969) */ \
|
||||||
"movi v23.8h, #18 \n" /* VB coefficient (-0.1406) */ \
|
"movi v23.8h, #18 \n" /* VB coefficient (-0.1406) */ \
|
||||||
"movi v24.8h, #94 \n" /* VG coefficient (-0.7344) */ \
|
"movi v24.8h, #94 \n" /* VG coefficient (-0.7344) */ \
|
||||||
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
|
"movi v25.8h, #0x80, lsl #8 \n" /* 128.0 (0x8000 in 16-bit) */
|
||||||
|
|
||||||
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
|
||||||
// clang-format off
|
// clang-format off
|
||||||
@ -2925,12 +2941,12 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
|
|||||||
int width) {
|
int width) {
|
||||||
const uint8_t* src_argb_1 = src_argb + src_stride_argb;
|
const uint8_t* src_argb_1 = src_argb + src_stride_argb;
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movi v20.8h, #127 \n" // UB/VR coeff (0.500)
|
"movi v20.8h, #128 \n" // UB/VR coeff (0.500)
|
||||||
"movi v21.8h, #84 \n" // UG coeff (-0.33126)
|
"movi v21.8h, #85 \n" // UG coeff (-0.33126)
|
||||||
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
||||||
"movi v23.8h, #20 \n" // VB coeff (-0.08131)
|
"movi v23.8h, #21 \n" // VB coeff (-0.08131)
|
||||||
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
||||||
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
|
"movi v25.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in 16-bit)
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
||||||
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
||||||
@ -2970,12 +2986,12 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
|
|||||||
int width) {
|
int width) {
|
||||||
const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr;
|
const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr;
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movi v20.8h, #127 \n" // UB/VR coeff (0.500)
|
"movi v20.8h, #128 \n" // UB/VR coeff (0.500)
|
||||||
"movi v21.8h, #84 \n" // UG coeff (-0.33126)
|
"movi v21.8h, #85 \n" // UG coeff (-0.33126)
|
||||||
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
||||||
"movi v23.8h, #20 \n" // VB coeff (-0.08131)
|
"movi v23.8h, #21 \n" // VB coeff (-0.08131)
|
||||||
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
||||||
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
|
"movi v25.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in 16-bit)
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
|
||||||
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
||||||
@ -3015,12 +3031,12 @@ void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
|
|||||||
int width) {
|
int width) {
|
||||||
const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
|
const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24;
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movi v20.8h, #127 \n" // UB/VR coeff (0.500)
|
"movi v20.8h, #128 \n" // UB/VR coeff (0.500)
|
||||||
"movi v21.8h, #84 \n" // UG coeff (-0.33126)
|
"movi v21.8h, #85 \n" // UG coeff (-0.33126)
|
||||||
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
||||||
"movi v23.8h, #20 \n" // VB coeff (-0.08131)
|
"movi v23.8h, #21 \n" // VB coeff (-0.08131)
|
||||||
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
||||||
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
|
"movi v25.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in 16-bit)
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
|
"ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
|
||||||
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
||||||
@ -3060,12 +3076,12 @@ void RAWToUVJRow_NEON(const uint8_t* src_raw,
|
|||||||
int width) {
|
int width) {
|
||||||
const uint8_t* src_raw_1 = src_raw + src_stride_raw;
|
const uint8_t* src_raw_1 = src_raw + src_stride_raw;
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movi v20.8h, #127 \n" // UB/VR coeff (0.500)
|
"movi v20.8h, #128 \n" // UB/VR coeff (0.500)
|
||||||
"movi v21.8h, #84 \n" // UG coeff (-0.33126)
|
"movi v21.8h, #85 \n" // UG coeff (-0.33126)
|
||||||
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
"movi v22.8h, #43 \n" // UR coeff (-0.16874)
|
||||||
"movi v23.8h, #20 \n" // VB coeff (-0.08131)
|
"movi v23.8h, #21 \n" // VB coeff (-0.08131)
|
||||||
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
"movi v24.8h, #107 \n" // VG coeff (-0.41869)
|
||||||
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
|
"movi v25.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in 16-bit)
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
|
"ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels.
|
||||||
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
"subs %w4, %w4, #16 \n" // 16 processed per loop.
|
||||||
@ -3606,12 +3622,13 @@ static void ARGBToYMatrixRow_NEON_DotProd(
|
|||||||
// B * 0.1140 coefficient = 29
|
// B * 0.1140 coefficient = 29
|
||||||
// G * 0.5870 coefficient = 150
|
// G * 0.5870 coefficient = 150
|
||||||
// R * 0.2990 coefficient = 77
|
// R * 0.2990 coefficient = 77
|
||||||
// Add 0.5 = 0x80
|
// Add 0.5
|
||||||
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128};
|
static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
|
||||||
|
0x0080};
|
||||||
static const struct RgbConstants kRgb24JPEGDotProdConstants = {{0, 29, 150, 77},
|
static const struct RgbConstants kRgb24JPEGDotProdConstants = {{0, 29, 150, 77},
|
||||||
128};
|
0x0080};
|
||||||
|
|
||||||
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128};
|
static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 0x0080};
|
||||||
|
|
||||||
// RGB to BT.601 coefficients
|
// RGB to BT.601 coefficients
|
||||||
// B * 0.1016 coefficient = 25
|
// B * 0.1016 coefficient = 25
|
||||||
|
|||||||
@ -203,6 +203,15 @@ void NV21ToRGB24Row_SVE2(const uint8_t* src_y,
|
|||||||
// elements flipped to account for the interleaving nature of the widening
|
// elements flipped to account for the interleaving nature of the widening
|
||||||
// addition instructions.
|
// addition instructions.
|
||||||
|
|
||||||
|
// RGB to BT601 coefficients
|
||||||
|
// UB 0.875 coefficient = 112
|
||||||
|
// UG -0.5781 coefficient = -74
|
||||||
|
// UR -0.2969 coefficient = -38
|
||||||
|
// VB -0.1406 coefficient = -18
|
||||||
|
// VG -0.7344 coefficient = -94
|
||||||
|
// VR 0.875 coefficient = 112
|
||||||
|
|
||||||
|
// SVE constants are not negated
|
||||||
static const int16_t kARGBToUVCoefficients[] = {
|
static const int16_t kARGBToUVCoefficients[] = {
|
||||||
// UB, -UR, -UG, 0, -VB, VR, -VG, 0
|
// UB, -UR, -UG, 0, -VB, VR, -VG, 0
|
||||||
112, -38, -74, 0, -18, 112, -94, 0,
|
112, -38, -74, 0, -18, 112, -94, 0,
|
||||||
@ -223,14 +232,22 @@ static const int16_t kABGRToUVCoefficients[] = {
|
|||||||
-38, 112, -74, 0, 112, -18, -94, 0,
|
-38, 112, -74, 0, 112, -18, -94, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// RGB to JPEG coefficients
|
||||||
|
// UB 0.500 coefficient = 128
|
||||||
|
// UG -0.33126 coefficient = -85
|
||||||
|
// UR -0.16874 coefficient = -43
|
||||||
|
// VB -0.08131 coefficient = -21
|
||||||
|
// VG -0.41869 coefficient = -107
|
||||||
|
// VR 0.500 coefficient = 128
|
||||||
|
|
||||||
static const int16_t kARGBToUVJCoefficients[] = {
|
static const int16_t kARGBToUVJCoefficients[] = {
|
||||||
// UB, -UR, -UG, 0, -VB, VR, -VG, 0
|
// UB, -UR, -UG, 0, -VB, VR, -VG, 0
|
||||||
127, -43, -84, 0, -20, 127, -107, 0,
|
128, -43, -85, 0, -21, 128, -107, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int16_t kABGRToUVJCoefficients[] = {
|
static const int16_t kABGRToUVJCoefficients[] = {
|
||||||
// -UR, UB, -UG, 0, VR, -VB, -VG, 0
|
// -UR, UB, -UG, 0, VR, -VB, -VG, 0
|
||||||
-43, 127, -84, 0, 127, -20, -107, 0,
|
-43, 128, -85, 0, 128, -21, -107, 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb,
|
static void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb,
|
||||||
@ -245,8 +262,7 @@ static void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb,
|
|||||||
"ptrue p0.b \n"
|
"ptrue p0.b \n"
|
||||||
"ld1rd {z24.d}, p0/z, [%[uvconstants]] \n"
|
"ld1rd {z24.d}, p0/z, [%[uvconstants]] \n"
|
||||||
"ld1rd {z25.d}, p0/z, [%[uvconstants], #8] \n"
|
"ld1rd {z25.d}, p0/z, [%[uvconstants], #8] \n"
|
||||||
"mov z26.b, #0x80 \n"
|
"mov z26.h, #0x8000 \n" // 128.0 (0x8000)
|
||||||
|
|
||||||
"cntb %[vl] \n"
|
"cntb %[vl] \n"
|
||||||
"subs %w[width], %w[width], %w[vl] \n"
|
"subs %w[width], %w[width], %w[vl] \n"
|
||||||
"b.lt 2f \n"
|
"b.lt 2f \n"
|
||||||
|
|||||||
@ -12,7 +12,8 @@
|
|||||||
|
|
||||||
// This module is for Visual C 32/64 bit
|
// This module is for Visual C 32/64 bit
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
||||||
!defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
|
(defined(_M_IX86) || defined(_M_X64)) && \
|
||||||
|
(!defined(__clang__) || defined(LIBYUV_ENABLE_ROWWIN))
|
||||||
|
|
||||||
#if defined(_M_ARM64EC)
|
#if defined(_M_ARM64EC)
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
@ -182,15 +183,52 @@ void I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
|
|||||||
|
|
||||||
// 32 bit
|
// 32 bit
|
||||||
#else // defined(_M_X64)
|
#else // defined(_M_X64)
|
||||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
|
||||||
|
|
||||||
// Constants for ARGB.
|
#ifdef HAS_ARGBTOUVROW_SSSE3
|
||||||
static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0,
|
|
||||||
13, 65, 33, 0, 13, 65, 33, 0};
|
// 8 bit fixed point 0.5, for bias of UV.
|
||||||
|
static const ulvec8 kBiasUV128 = {
|
||||||
|
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||||
|
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||||
|
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
|
||||||
|
|
||||||
|
// NV21 shuf 8 VU to 16 UV.
|
||||||
|
static const lvec8 kShuffleNV21 = {
|
||||||
|
1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
|
||||||
|
1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
// YUY2 shuf 16 Y to 32 Y.
|
||||||
|
static const lvec8 kShuffleYUY2Y = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10,
|
||||||
|
10, 12, 12, 14, 14, 0, 0, 2, 2, 4, 4,
|
||||||
|
6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
|
||||||
|
|
||||||
|
// YUY2 shuf 8 UV to 16 UV.
|
||||||
|
static const lvec8 kShuffleYUY2UV = {1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9,
|
||||||
|
11, 13, 15, 13, 15, 1, 3, 1, 3, 5, 7,
|
||||||
|
5, 7, 9, 11, 9, 11, 13, 15, 13, 15};
|
||||||
|
|
||||||
|
// UYVY shuf 16 Y to 32 Y.
|
||||||
|
static const lvec8 kShuffleUYVYY = {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11,
|
||||||
|
11, 13, 13, 15, 15, 1, 1, 3, 3, 5, 5,
|
||||||
|
7, 7, 9, 9, 11, 11, 13, 13, 15, 15};
|
||||||
|
|
||||||
|
// UYVY shuf 8 UV to 16 UV.
|
||||||
|
static const lvec8 kShuffleUYVYUV = {0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8,
|
||||||
|
10, 12, 14, 12, 14, 0, 2, 0, 2, 4, 6,
|
||||||
|
4, 6, 8, 10, 8, 10, 12, 14, 12, 14};
|
||||||
|
|
||||||
// JPeg full range.
|
// JPeg full range.
|
||||||
static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0,
|
static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0,
|
||||||
15, 75, 38, 0, 15, 75, 38, 0};
|
15, 75, 38, 0, 15, 75, 38, 0};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// vpermd for vphaddw + vpackuswb vpermd.
|
||||||
|
static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
|
||||||
|
|
||||||
|
// Constants for ARGB.
|
||||||
|
static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0,
|
||||||
|
13, 65, 33, 0, 13, 65, 33, 0};
|
||||||
|
|
||||||
static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0,
|
static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0,
|
||||||
112, -74, -38, 0, 112, -74, -38, 0};
|
112, -74, -38, 0, 112, -74, -38, 0};
|
||||||
@ -246,12 +284,6 @@ static const uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
|
|||||||
// 7 bit fixed point 0.5.
|
// 7 bit fixed point 0.5.
|
||||||
static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64};
|
static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64};
|
||||||
|
|
||||||
// 8 bit fixed point 0.5, for bias of UV.
|
|
||||||
static const ulvec8 kBiasUV128 = {
|
|
||||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
|
||||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
|
||||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
|
|
||||||
|
|
||||||
// Shuffle table for converting RGB24 to ARGB.
|
// Shuffle table for converting RGB24 to ARGB.
|
||||||
static const uvec8 kShuffleMaskRGB24ToARGB = {
|
static const uvec8 kShuffleMaskRGB24ToARGB = {
|
||||||
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u};
|
0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u};
|
||||||
@ -287,32 +319,6 @@ static const uvec8 kShuffleMaskARGBToRAW = {
|
|||||||
static const uvec8 kShuffleMaskARGBToRGB24_0 = {
|
static const uvec8 kShuffleMaskARGBToRGB24_0 = {
|
||||||
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u};
|
0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u};
|
||||||
|
|
||||||
// YUY2 shuf 16 Y to 32 Y.
|
|
||||||
static const lvec8 kShuffleYUY2Y = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10,
|
|
||||||
10, 12, 12, 14, 14, 0, 0, 2, 2, 4, 4,
|
|
||||||
6, 6, 8, 8, 10, 10, 12, 12, 14, 14};
|
|
||||||
|
|
||||||
// YUY2 shuf 8 UV to 16 UV.
|
|
||||||
static const lvec8 kShuffleYUY2UV = {1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9,
|
|
||||||
11, 13, 15, 13, 15, 1, 3, 1, 3, 5, 7,
|
|
||||||
5, 7, 9, 11, 9, 11, 13, 15, 13, 15};
|
|
||||||
|
|
||||||
// UYVY shuf 16 Y to 32 Y.
|
|
||||||
static const lvec8 kShuffleUYVYY = {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11,
|
|
||||||
11, 13, 13, 15, 15, 1, 1, 3, 3, 5, 5,
|
|
||||||
7, 7, 9, 9, 11, 11, 13, 13, 15, 15};
|
|
||||||
|
|
||||||
// UYVY shuf 8 UV to 16 UV.
|
|
||||||
static const lvec8 kShuffleUYVYUV = {0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8,
|
|
||||||
10, 12, 14, 12, 14, 0, 2, 0, 2, 4, 6,
|
|
||||||
4, 6, 8, 10, 8, 10, 12, 14, 12, 14};
|
|
||||||
|
|
||||||
// NV21 shuf 8 VU to 16 UV.
|
|
||||||
static const lvec8 kShuffleNV21 = {
|
|
||||||
1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
|
|
||||||
1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Duplicates gray value 3 times and fills in alpha opaque.
|
// Duplicates gray value 3 times and fills in alpha opaque.
|
||||||
__declspec(naked) void J400ToARGBRow_SSE2(const uint8_t* src_y,
|
__declspec(naked) void J400ToARGBRow_SSE2(const uint8_t* src_y,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
@ -1240,8 +1246,6 @@ __declspec(naked) void ARGBToYJRow_SSSE3(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOYROW_AVX2
|
#ifdef HAS_ARGBTOYROW_AVX2
|
||||||
// vpermd for vphaddw + vpackuswb vpermd.
|
|
||||||
static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
|
|
||||||
|
|
||||||
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
|
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
|
||||||
__declspec(naked) void ARGBToYRow_AVX2(const uint8_t* src_argb,
|
__declspec(naked) void ARGBToYRow_AVX2(const uint8_t* src_argb,
|
||||||
@ -1511,7 +1515,9 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
|
|||||||
mov edx, [esp + 8 + 12] // dst_u
|
mov edx, [esp + 8 + 12] // dst_u
|
||||||
mov edi, [esp + 8 + 16] // dst_v
|
mov edi, [esp + 8 + 16] // dst_v
|
||||||
mov ecx, [esp + 8 + 20] // width
|
mov ecx, [esp + 8 + 20] // width
|
||||||
|
// TODO: change biasuv to 0x8000
|
||||||
movdqa xmm5, xmmword ptr kBiasUV128
|
movdqa xmm5, xmmword ptr kBiasUV128
|
||||||
|
// TODO: use negated coefficients to allow -128
|
||||||
movdqa xmm6, xmmword ptr kARGBToVJ
|
movdqa xmm6, xmmword ptr kARGBToVJ
|
||||||
movdqa xmm7, xmmword ptr kARGBToUJ
|
movdqa xmm7, xmmword ptr kARGBToUJ
|
||||||
sub edi, edx // stride from u to v
|
sub edi, edx // stride from u to v
|
||||||
@ -1552,10 +1558,12 @@ __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
|
|||||||
pmaddubsw xmm3, xmm6
|
pmaddubsw xmm3, xmm6
|
||||||
phaddw xmm0, xmm2
|
phaddw xmm0, xmm2
|
||||||
phaddw xmm1, xmm3
|
phaddw xmm1, xmm3
|
||||||
|
// TODO: negate by subtracting from 0x8000
|
||||||
paddw xmm0, xmm5 // +.5 rounding -> unsigned
|
paddw xmm0, xmm5 // +.5 rounding -> unsigned
|
||||||
paddw xmm1, xmm5
|
paddw xmm1, xmm5
|
||||||
psraw xmm0, 8
|
psraw xmm0, 8
|
||||||
psraw xmm1, 8
|
psraw xmm1, 8
|
||||||
|
// TODO: packuswb
|
||||||
packsswb xmm0, xmm1
|
packsswb xmm0, xmm1
|
||||||
|
|
||||||
// step 3 - store 8 U and 8 V values
|
// step 3 - store 8 U and 8 V values
|
||||||
@ -1981,7 +1989,6 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb,
|
|||||||
ret
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOYROW_SSSE3
|
|
||||||
|
|
||||||
// Read 16 UV from 444
|
// Read 16 UV from 444
|
||||||
#define READYUV444_AVX2 \
|
#define READYUV444_AVX2 \
|
||||||
|
|||||||
@ -17,7 +17,9 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This module is for GCC x86 and x64.
|
// This module is for GCC x86 and x64.
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
|
(defined(__x86_64__) || defined(__i386__)) && \
|
||||||
|
!defined(LIBYUV_ENABLE_ROWWIN)
|
||||||
|
|
||||||
// Offsets for source bytes 0 to 9
|
// Offsets for source bytes 0 to 9
|
||||||
static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9,
|
static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9,
|
||||||
@ -1761,25 +1763,25 @@ void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
|
|||||||
void ScaleAddRow_SSE2(const uint8_t* src_ptr,
|
void ScaleAddRow_SSE2(const uint8_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int src_width) {
|
int src_width) {
|
||||||
asm volatile("pxor %%xmm5,%%xmm5 \n"
|
asm volatile("pxor %%xmm5,%%xmm5 \n"
|
||||||
|
|
||||||
// 16 pixel loop.
|
// 16 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm3 \n"
|
"movdqu (%0),%%xmm3 \n"
|
||||||
"lea 0x10(%0),%0 \n" // src_ptr += 16
|
"lea 0x10(%0),%0 \n" // src_ptr += 16
|
||||||
"movdqu (%1),%%xmm0 \n"
|
"movdqu (%1),%%xmm0 \n"
|
||||||
"movdqu 0x10(%1),%%xmm1 \n"
|
"movdqu 0x10(%1),%%xmm1 \n"
|
||||||
"movdqa %%xmm3,%%xmm2 \n"
|
"movdqa %%xmm3,%%xmm2 \n"
|
||||||
"punpcklbw %%xmm5,%%xmm2 \n"
|
"punpcklbw %%xmm5,%%xmm2 \n"
|
||||||
"punpckhbw %%xmm5,%%xmm3 \n"
|
"punpckhbw %%xmm5,%%xmm3 \n"
|
||||||
"paddusw %%xmm2,%%xmm0 \n"
|
"paddusw %%xmm2,%%xmm0 \n"
|
||||||
"paddusw %%xmm3,%%xmm1 \n"
|
"paddusw %%xmm3,%%xmm1 \n"
|
||||||
"movdqu %%xmm0,(%1) \n"
|
"movdqu %%xmm0,(%1) \n"
|
||||||
"movdqu %%xmm1,0x10(%1) \n"
|
"movdqu %%xmm1,0x10(%1) \n"
|
||||||
"lea 0x20(%1),%1 \n"
|
"lea 0x20(%1),%1 \n"
|
||||||
"sub $0x10,%2 \n"
|
"sub $0x10,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(src_width) // %2
|
"+r"(src_width) // %2
|
||||||
@ -1792,23 +1794,23 @@ void ScaleAddRow_SSE2(const uint8_t* src_ptr,
|
|||||||
void ScaleAddRow_AVX2(const uint8_t* src_ptr,
|
void ScaleAddRow_AVX2(const uint8_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int src_width) {
|
int src_width) {
|
||||||
asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovdqu (%0),%%ymm3 \n"
|
"vmovdqu (%0),%%ymm3 \n"
|
||||||
"lea 0x20(%0),%0 \n" // src_ptr += 32
|
"lea 0x20(%0),%0 \n" // src_ptr += 32
|
||||||
"vpermq $0xd8,%%ymm3,%%ymm3 \n"
|
"vpermq $0xd8,%%ymm3,%%ymm3 \n"
|
||||||
"vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
|
"vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
|
||||||
"vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
|
"vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
|
||||||
"vpaddusw (%1),%%ymm2,%%ymm0 \n"
|
"vpaddusw (%1),%%ymm2,%%ymm0 \n"
|
||||||
"vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n"
|
"vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n"
|
||||||
"vmovdqu %%ymm0,(%1) \n"
|
"vmovdqu %%ymm0,(%1) \n"
|
||||||
"vmovdqu %%ymm1,0x20(%1) \n"
|
"vmovdqu %%ymm1,0x20(%1) \n"
|
||||||
"lea 0x40(%1),%1 \n"
|
"lea 0x40(%1),%1 \n"
|
||||||
"sub $0x20,%2 \n"
|
"sub $0x20,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(src_width) // %2
|
"+r"(src_width) // %2
|
||||||
|
|||||||
@ -17,8 +17,8 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This module is for 32 bit Visual C x86
|
// This module is for 32 bit Visual C x86
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
|
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_IX86) && \
|
||||||
!defined(__clang__) && defined(_M_IX86)
|
(!defined(__clang__) || defined(LIBYUV_ENABLE_ROWWIN))
|
||||||
|
|
||||||
// Offsets for source bytes 0 to 9
|
// Offsets for source bytes 0 to 9
|
||||||
static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9,
|
static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9,
|
||||||
|
|||||||
@ -2076,7 +2076,7 @@ TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
|
uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
|
||||||
EXPECT_EQ(4157186353u, checksum);
|
EXPECT_EQ(223551344u, checksum);
|
||||||
|
|
||||||
free_aligned_buffer_page_end(orig_rgb24);
|
free_aligned_buffer_page_end(orig_rgb24);
|
||||||
free_aligned_buffer_page_end(dest_j420);
|
free_aligned_buffer_page_end(dest_j420);
|
||||||
@ -2104,7 +2104,7 @@ TEST_F(LibYUVConvertTest, TestRGB24ToI420) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381);
|
uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381);
|
||||||
EXPECT_EQ(1526656597u, checksum);
|
EXPECT_EQ(4197774805u, checksum);
|
||||||
|
|
||||||
free_aligned_buffer_page_end(orig_rgb24);
|
free_aligned_buffer_page_end(orig_rgb24);
|
||||||
free_aligned_buffer_page_end(dest_i420);
|
free_aligned_buffer_page_end(dest_i420);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user