diff --git a/README.chromium b/README.chromium index 578228da4..f49e57c87 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1770 +Version: 1772 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index 124775ae9..d9cc5bd26 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -23,12 +23,14 @@ extern "C" { // Conversion matrix for YUV to RGB LIBYUV_API extern const struct YuvConstants kYuvI601Constants; // BT.601 LIBYUV_API extern const struct YuvConstants kYuvJPEGConstants; // JPeg +LIBYUV_API extern const struct YuvConstants kYuvF709Constants; // BT.709 full LIBYUV_API extern const struct YuvConstants kYuvH709Constants; // BT.709 LIBYUV_API extern const struct YuvConstants kYuv2020Constants; // BT.2020 // Conversion matrix for YVU to BGR LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601 LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // JPeg +LIBYUV_API extern const struct YuvConstants kYvuF709Constants; // BT.709 full LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709 LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020 @@ -37,6 +39,7 @@ LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020 // TODO(fbarchard): Add macro for each Matrix function. #define kYuvI601ConstantsVU kYvuI601Constants #define kYuvJPEGConstantsVU kYvuJPEGConstants +#define kYuvF709ConstantsVU kYvuF709Constants #define kYuvH709ConstantsVU kYvuH709Constants #define kYuv2020ConstantsVU kYvu2020Constants #define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) \ @@ -118,6 +121,32 @@ int J420ToABGR(const uint8_t* src_y, int width, int height); +// Convert F420 to ARGB. BT.709 full range +LIBYUV_API +int F420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert F420 to ABGR. BT.709 full range +LIBYUV_API +int F420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + // Convert H420 to ARGB. LIBYUV_API int H420ToARGB(const uint8_t* src_y, diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 8d868b954..d1e55fb9a 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -201,14 +201,28 @@ int I444Copy(const uint8_t* src_y, int height); // Copy NV12. Supports inverting. -int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, - int src_stride_uv, uint8_t* dst_y, int dst_stride_y, - uint8_t* dst_uv, int dst_stride_uv, int width, int height); +int NV12Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); // Copy NV21. Supports inverting. -int NV21Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, - int src_stride_vu, uint8_t* dst_y, int dst_stride_y, - uint8_t* dst_vu, int dst_stride_vu, int width, int height); +int NV21Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); // Convert YUY2 to I422. LIBYUV_API diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 3c632b3ab..37a340d45 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1770 +#define LIBYUV_VERSION 1772 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h index b9823d71d..6e408eda4 100644 --- a/include/libyuv/video_common.h +++ b/include/libyuv/video_common.h @@ -94,16 +94,21 @@ enum FourCC { FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc FOURCC_J400 = FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc + FOURCC_F420 = FOURCC('F', '4', '2', '0'), // bt.709 full, unofficial fourcc + FOURCC_F422 = FOURCC('F', '4', '2', '2'), // bt.709 full, unofficial fourcc + FOURCC_F444 = FOURCC('F', '4', '4', '4'), // bt.709 full, unofficial fourcc FOURCC_H420 = FOURCC('H', '4', '2', '0'), // bt.709, unofficial fourcc FOURCC_H422 = FOURCC('H', '4', '2', '2'), // bt.709, unofficial fourcc FOURCC_H444 = FOURCC('H', '4', '4', '4'), // bt.709, unofficial fourcc FOURCC_U420 = FOURCC('U', '4', '2', '0'), // bt.2020, unofficial fourcc FOURCC_U422 = FOURCC('U', '4', '2', '2'), // bt.2020, unofficial fourcc FOURCC_U444 = FOURCC('U', '4', '4', '4'), // bt.2020, unofficial fourcc + FOURCC_F010 = FOURCC('F', '0', '1', '0'), // bt.709 full range 10 bit 420 FOURCC_H010 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 420 FOURCC_U010 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 420 - FOURCC_H210 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 422 - FOURCC_U210 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 422 + FOURCC_F210 = FOURCC('F', '2', '1', '0'), // bt.709 full range 10 bit 422 + FOURCC_H210 = FOURCC('H', '2', '1', '0'), // bt.709 10 bit 422 + FOURCC_U210 = FOURCC('U', '2', '1', '0'), // bt.2020 10 bit 422 // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 4e8908c2e..31a05048c 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -350,9 +350,16 @@ int I420ToI400(const uint8_t* src_y, } // Copy NV12. Supports inverting. -int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, - int src_stride_uv, uint8_t* dst_y, int dst_stride_y, - uint8_t* dst_uv, int dst_stride_uv, int width, int height) { +int NV12Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { if (!src_y || !dst_y || !src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -375,9 +382,16 @@ int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, } // Copy NV21. Supports inverting. -int NV21Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, - int src_stride_vu, uint8_t* dst_y, int dst_stride_y, - uint8_t* dst_vu, int dst_stride_vu, int width, int height) { +int NV21Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { return NV12Copy(src_y, src_stride_y, src_vu, src_stride_vu, dst_y, dst_stride_y, dst_vu, dst_stride_vu, width, height); } diff --git a/source/row_common.cc b/source/row_common.cc index 0eca001fa..09988fef0 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1327,7 +1327,68 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { } } -// TODO(fbarchard): Unify these structures to be platform independent. +// Macros to create SIMD specific yuv to rgb conversion constants. + +#if defined(__aarch64__) +#define MAKEYUVCONSTANTS(name, YG, YGB, UB, UG, VG, VR, BB, BG, BR) \ + const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ + {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, \ + {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, \ + {UG, VG, UG, VG, UG, VG, UG, VG}, \ + {UG, VG, UG, VG, UG, VG, UG, VG}, \ + {BB, BG, BR, YGB, 0, 0, 0, 0}, \ + {0x0101 * YG, YG, 0, 0}}; \ + const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ + {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, \ + {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, \ + {VG, UG, VG, UG, VG, UG, VG, UG}, \ + {VG, UG, VG, UG, VG, UG, VG, UG}, \ + {BR, BG, BB, YGB, 0, 0, 0, 0}, \ + {0x0101 * YG, YG, 0, 0}}; + +#elif defined(__arm__) +#define MAKEYUVCONSTANTS(name, YG, YGB, UB, UG, VG, VR, BB, BG, BR) \ + const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ + {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {BB, BG, BR, YGB, 0, 0, 0, 0}, \ + {0x0101 * YG, YG, 0, 0}}; \ + const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ + {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, \ + {BR, BG, BB, YGB, 0, 0, 0, 0}, \ + {0x0101 * YG, YG, 0, 0}}; + +#else +#define MAKEYUVCONSTANTS(name, YG, YGB, UB, UG, VG, VR, BB, BG, BR) \ + const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = { \ + {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \ + UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \ + {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \ + UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \ + {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \ + 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \ + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \ + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \ + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \ + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ + {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, \ + YGB, YGB}}; \ + const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = { \ + {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, \ + VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, \ + {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, \ + VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, \ + {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, \ + 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, \ + {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, \ + {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, \ + {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, \ + {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \ + {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, \ + YGB, YGB}}; +#endif + // TODO(fbarchard): Generate SIMD structures from float matrix. // BT.601 YUV to RGB reference @@ -1350,60 +1411,7 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) -#if defined(__aarch64__) // 64 bit arm -const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, - {UG, VG, UG, VG, UG, VG, UG, VG}, - {UG, VG, UG, VG, UG, VG, UG, VG}, - {BB, BG, BR, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, - {VG, UG, VG, UG, VG, UG, VG, UG}, - {VG, UG, VG, UG, VG, UG, VG, UG}, - {BR, BG, BB, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -#elif defined(__arm__) // 32 bit arm -const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { - {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, - {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, - {BB, BG, BR, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { - {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, - {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, - {BR, BG, BB, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -#else -const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { - {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, - UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, - {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, - {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, - 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, - YGB}}; -const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { - {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, - {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, - {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, - YGB}}; -#endif +MAKEYUVCONSTANTS(I601, YG, YGB, UB, UG, VG, VR, BB, BG, BR) #undef BB #undef BG @@ -1435,60 +1443,7 @@ const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) -#if defined(__aarch64__) -const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, - {UG, VG, UG, VG, UG, VG, UG, VG}, - {UG, VG, UG, VG, UG, VG, UG, VG}, - {BB, BG, BR, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, - {VG, UG, VG, UG, VG, UG, VG, UG}, - {VG, UG, VG, UG, VG, UG, VG, UG}, - {BR, BG, BB, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -#elif defined(__arm__) -const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { - {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, - {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, - {BB, BG, BR, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { - {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, - {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, - {BR, BG, BB, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -#else -const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { - {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, - UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, - {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, - {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, - 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, - YGB}}; -const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { - {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, - {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, - {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, - YGB}}; -#endif +MAKEYUVCONSTANTS(JPEG, YG, YGB, UB, UG, VG, VR, BB, BG, BR) #undef BB #undef BG @@ -1504,7 +1459,6 @@ const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { // R = (Y - 16) * 1.164 - V * -1.793 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533 // B = (Y - 16) * 1.164 - U * -2.112 -// See also http://www.equasys.de/colorconversion.html // Y contribution to R,G,B. Scale and bias. #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ @@ -1522,60 +1476,45 @@ const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) -#if defined(__aarch64__) -const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, - {UG, VG, UG, VG, UG, VG, UG, VG}, - {UG, VG, UG, VG, UG, VG, UG, VG}, - {BB, BG, BR, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, - {VG, UG, VG, UG, VG, UG, VG, UG}, - {VG, UG, VG, UG, VG, UG, VG, UG}, - {BR, BG, BB, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -#elif defined(__arm__) -const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { - {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, - {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, - {BB, BG, BR, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { - {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, - {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, - {BR, BG, BB, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -#else -const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { - {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, - UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, - {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, - {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, - 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, - YGB}}; -const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { - {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, - {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, - {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, - YGB}}; -#endif +MAKEYUVCONSTANTS(H709, YG, YGB, UB, UG, VG, VR, BB, BG, BR) + +#undef BB +#undef BG +#undef BR +#undef YGB +#undef UB +#undef UG +#undef VG +#undef VR +#undef YG + +// BT.709 full range YUV to RGB reference +// R = Y - V * -1.5748 +// G = Y - U * 0.18732 - V * 0.46812 +// B = Y - U * -1.8556 +// WR = 0.2126 +// WB = 0.0722 +// WR and WB given, the equations are: +// R = Y + (2 * (1 - WR)) * V; +// G = Y - ((2 * ((WR * (1 - WR) * V) + (WB * (1 - WB) * U))) / (1 - WB - WR)); +// B = Y + (2 * (1 - WB)) * U; + +// Y contribution to R,G,B. Scale and bias. (same as jpeg) +#define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */ +#define YGB 32 /* 64 / 2 */ + +// U and V contributions to R,G,B. +#define UB -119 /* round(-1.8556 * 64) */ +#define UG 12 /* round(0.18732 * 64) */ +#define VG 30 /* round(0.46812 * 64) */ +#define VR -101 /* round(-1.5748 * 64) */ + +// Bias values to round, and subtract 128 from U and V. +#define BB (UB * 128 + YGB) +#define BG (UG * 128 + VG * 128 + YGB) +#define BR (VR * 128 + YGB) + +MAKEYUVCONSTANTS(F709, YG, YGB, UB, UG, VG, VR, BB, BG, BR) #undef BB #undef BG @@ -1608,60 +1547,7 @@ const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) -#if defined(__aarch64__) -const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = { - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, - {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, - {UG, VG, UG, VG, UG, VG, UG, VG}, - {UG, VG, UG, VG, UG, VG, UG, VG}, - {BB, BG, BR, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = { - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, - {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, - {VG, UG, VG, UG, VG, UG, VG, UG}, - {VG, UG, VG, UG, VG, UG, VG, UG}, - {BR, BG, BB, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -#elif defined(__arm__) -const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = { - {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, - {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, - {BB, BG, BR, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = { - {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, - {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, - {BR, BG, BB, YGB, 0, 0, 0, 0}, - {0x0101 * YG, YG, 0, 0}}; -#else -const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = { - {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, - UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, - {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, - {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, - 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, - YGB}}; -const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = { - {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, - VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, - {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, - {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, - 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, - {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, - {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, - {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, - {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, - {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, - YGB}}; -#endif +MAKEYUVCONSTANTS(2020, YG, YGB, UB, UG, VG, VR, BB, BG, BR) #undef BB #undef BG @@ -1673,6 +1559,8 @@ const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = { #undef VR #undef YG +#undef MAKEYUVCONSTANTS + // C reference code that mimics the YUV assembly. // Reads 8 bit YUV and leaves result as 16 bit. static __inline void YuvPixel(uint8_t y, @@ -1712,9 +1600,9 @@ static __inline void YuvPixel(uint8_t y, #endif uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; - *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6); - *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6); - *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6); + *b = Clamp((int32_t)(y1 + -(u * ub) + bb) >> 6); + *g = Clamp((int32_t)(y1 + -(u * ug + v * vg) + bg) >> 6); + *r = Clamp((int32_t)(y1 + -(v * vr) + br) >> 6); } // Reads 8 bit YUV and leaves result as 16 bit. @@ -2016,6 +1904,40 @@ void I422ToAR30Row_C(const uint8_t* src_y, } } +#if !defined(LIBYUV_DISABLE_NEON) && \ + (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) +// C mimic assembly. +// TODO(fbarchard): Remove subsampling from Neon. +void I444AlphaToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width) { + int x; + for (x = 0; x < width - 1; x += 2) { + uint8_t u = (src_u[0] + src_u[1] + 1) >> 1; + uint8_t v = (src_v[0] + src_v[1] + 1) >> 1; + YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, + yuvconstants); + rgb_buf[3] = src_a[0]; + YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, + yuvconstants); + rgb_buf[7] = src_a[1]; + src_y += 2; + src_u += 2; + src_v += 2; + src_a += 2; + rgb_buf += 8; // Advance 2 pixels. + } + if (width & 1) { + YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, + rgb_buf + 2, yuvconstants); + rgb_buf[3] = src_a[0]; + } +} +#else void I444AlphaToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -2035,6 +1957,7 @@ void I444AlphaToARGBRow_C(const uint8_t* src_y, rgb_buf += 4; // Advance 1 pixel. } } +#endif void I422AlphaToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, diff --git a/source/row_gcc.cc b/source/row_gcc.cc index bfffdce4b..992986e2d 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -1278,7 +1278,7 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) { "vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2( - ymm5) "vzeroupper \n" + ymm5) "vzeroupper \n" : "+r"(src_rgba), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 @@ -2161,15 +2161,15 @@ void OMITFP I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf, // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) - "sub %[u_buf],%[v_buf] \n" + "sub %[u_buf],%[v_buf] \n" LABELALIGN - "1: \n" + "1: \n" READYUVA444 YUVTORGB(yuvconstants) STOREARGB - "subl $0x8,%[width] \n" - "jg 1b \n" + "subl $0x8,%[width] \n" + "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] @@ -2947,16 +2947,16 @@ void OMITFP I444AlphaToARGBRow_AVX2(const uint8_t* y_buf, // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) - "sub %[u_buf],%[v_buf] \n" + "sub %[u_buf],%[v_buf] \n" LABELALIGN - "1: \n" + "1: \n" READYUVA444_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 - "subl $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "subl $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] diff --git a/source/row_neon.cc b/source/row_neon.cc index f73f7da90..a17899bef 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -168,8 +168,8 @@ void I444AlphaToARGBRow_NEON(const uint8_t* src_y, asm volatile( YUVTORGB_SETUP "1: \n" READYUV444 YUVTORGB - "subs %5, %5, #8 \n" "vld1.8 {d23}, [%3]! \n" + "subs %5, %5, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%4]! \n" "bgt 1b \n" : "+r"(src_y), // %0 @@ -415,11 +415,11 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" READNV12 YUVTORGB - "subs %3, %3, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" + "vmov.u8 d23, #255 \n" + "1: \n" READNV12 YUVTORGB + "subs %3, %3, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%2]! \n" + "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_argb), // %2 @@ -438,11 +438,11 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" READNV21 YUVTORGB - "subs %3, %3, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" + "vmov.u8 d23, #255 \n" + "1: \n" READNV21 YUVTORGB + "subs %3, %3, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%2]! \n" + "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_vu), // %1 "+r"(dst_argb), // %2 @@ -537,11 +537,11 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" READYUY2 YUVTORGB - "subs %2, %2, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" + "vmov.u8 d23, #255 \n" + "1: \n" READYUY2 YUVTORGB + "subs %2, %2, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%1]! \n" + "bgt 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 @@ -558,11 +558,11 @@ void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" READUYVY YUVTORGB - "subs %2, %2, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" + "vmov.u8 d23, #255 \n" + "1: \n" READUYVY YUVTORGB + "subs %2, %2, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%1]! \n" + "bgt 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 4f33456e8..dd5e6dd24 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -595,7 +595,7 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READNV12 - "prfm pldl1keep, [%0, 448] \n" YUVTORGB( + "prfm pldl1keep, [%0, 448] \n" YUVTORGB( v22, v21, v20) ARGBTORGB565 "prfm pldl1keep, [%1, 256] \n" "subs %w3, %w3, #8 \n" diff --git a/source/row_win.cc b/source/row_win.cc index a8c3bad0e..951518926 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -28,25 +28,25 @@ extern "C" { #if defined(_M_X64) // Read 8 UV from 444 -#define READYUV444 \ - xmm0 = _mm_loadl_epi64((__m128i*)u_buf); \ - xmm1 = _mm_loadl_epi64((__m128i*)(u_buf + offset)); \ - xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ - u_buf += 8; \ - xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ - xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ +#define READYUV444 \ + xmm0 = _mm_loadl_epi64((__m128i*)u_buf); \ + xmm1 = _mm_loadl_epi64((__m128i*)(u_buf + offset)); \ + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ + u_buf += 8; \ + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ y_buf += 8; // Read 8 UV from 444, With 8 Alpha. -#define READYUVA444 \ - xmm0 = _mm_loadl_epi64((__m128i*)u_buf); \ - xmm1 = _mm_loadl_epi64((__m128i*)(u_buf + offset)); \ - xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ - u_buf += 8; \ - xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ - xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ - y_buf += 8; \ - xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ +#define READYUVA444 \ + xmm0 = _mm_loadl_epi64((__m128i*)u_buf); \ + xmm1 = _mm_loadl_epi64((__m128i*)(u_buf + offset)); \ + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ + u_buf += 8; \ + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ + y_buf += 8; \ + xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ a_buf += 8; // Read 4 UV from 422, upsample to 8 UV. diff --git a/source/scale_neon.cc b/source/scale_neon.cc index 572b4bfa9..20e5b9af6 100644 --- a/source/scale_neon.cc +++ b/source/scale_neon.cc @@ -991,20 +991,20 @@ void ScaleUVRowDownEven_NEON(const uint8_t* src_ptr, (void)src_stride; asm volatile( "1: \n" - "vld1.16 {d0[0]}, [%0], %6 \n" - "vld1.16 {d0[1]}, [%1], %6 \n" - "vld1.16 {d0[2]}, [%2], %6 \n" - "vld1.16 {d0[3]}, [%3], %6 \n" - "subs %5, %5, #4 \n" // 4 pixels per loop. - "vst1.8 {d0}, [%4]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(src1_ptr), // %1 - "+r"(src2_ptr), // %2 - "+r"(src3_ptr), // %3 - "+r"(dst_ptr), // %4 - "+r"(dst_width) // %5 - : "r"(src_stepx * 8) // %6 + "vld1.16 {d0[0]}, [%0], %6 \n" + "vld1.16 {d0[1]}, [%1], %6 \n" + "vld1.16 {d0[2]}, [%2], %6 \n" + "vld1.16 {d0[3]}, [%3], %6 \n" + "subs %5, %5, #4 \n" // 4 pixels per loop. + "vst1.8 {d0}, [%4]! \n" + "bgt 1b \n" + : "+r"(src_ptr), // %0 + "+r"(src1_ptr), // %1 + "+r"(src2_ptr), // %2 + "+r"(src3_ptr), // %3 + "+r"(dst_ptr), // %4 + "+r"(dst_width) // %5 + : "r"(src_stepx * 8) // %6 : "memory", "cc", "d0"); } diff --git a/source/scale_uv.cc b/source/scale_uv.cc index c57df5959..2235eebe8 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -305,7 +305,7 @@ static void ScaleUVDownEven(int src_width, ScaleUVRowDownEven = ScaleUVRowDownEven_NEON; } } -#endif// TODO(fbarchard): Enable Box filter +#endif // TODO(fbarchard): Enable Box filter #if defined(HAS_SCALEUVROWDOWNEVENBOX_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_NEON diff --git a/unit_test/color_test.cc b/unit_test/color_test.cc index 842fd9944..952472143 100644 --- a/unit_test/color_test.cc +++ b/unit_test/color_test.cc @@ -208,7 +208,33 @@ static void YUVHToRGB(int y, int u, int v, int* r, int* g, int* b) { *r = orig_pixels[2]; } -static void YUVRec2020ToRGB(int y, int u, int v, int* r, int* g, int* b) { +#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) + +static void YUVFToRGB(int y, int u, int v, int* r, int* g, int* b) { + const int kWidth = 16; + const int kHeight = 1; + const int kPixels = kWidth * kHeight; + const int kHalfPixels = ((kWidth + 1) / 2) * ((kHeight + 1) / 2); + + SIMD_ALIGNED(uint8_t orig_y[16]); + SIMD_ALIGNED(uint8_t orig_u[8]); + SIMD_ALIGNED(uint8_t orig_v[8]); + SIMD_ALIGNED(uint8_t orig_pixels[16 * 4]); + memset(orig_y, y, kPixels); + memset(orig_u, u, kHalfPixels); + memset(orig_v, v, kHalfPixels); + + /* YUV converted to ARGB. */ + F422ToARGB(orig_y, kWidth, orig_u, (kWidth + 1) / 2, orig_v, (kWidth + 1) / 2, + orig_pixels, kWidth * 4, kWidth, kHeight); + + *b = orig_pixels[0]; + *g = orig_pixels[1]; + *r = orig_pixels[2]; +} + +static void YUVUToRGB(int y, int u, int v, int* r, int* g, int* b) { const int kWidth = 16; const int kHeight = 1; const int kPixels = kWidth * kHeight; @@ -401,13 +427,15 @@ static void YUVHToRGBReference(int y, int u, int v, int* r, int* g, int* b) { *b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.112); } +// BT.709 full range YUV to RGB reference +static void YUVFToRGBReference(int y, int u, int v, int* r, int* g, int* b) { + *r = RoundToByte(y - (v - 128) * -1.5748); + *g = RoundToByte(y - (u - 128) * 0.18732 - (v - 128) * 0.46812); + *b = RoundToByte(y - (u - 128) * -1.8556); +} + // BT.2020 YUV to RGB reference -static void YUVRec2020ToRGBReference(int y, - int u, - int v, - int* r, - int* g, - int* b) { +static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) { *r = RoundToByte((y - 16) * 1.164384 - (v - 128) * -1.67867); *g = RoundToByte((y - 16) * 1.164384 - (u - 128) * 0.187326 - (v - 128) * 0.65042); @@ -633,7 +661,7 @@ TEST_F(LibYUVColorTest, TestFullYUVH) { PrintHistogram(rh, gh, bh); } -TEST_F(LibYUVColorTest, TestFullYUVRec2020) { +TEST_F(LibYUVColorTest, TestFullYUVF) { int rh[256] = { 0, }; @@ -648,8 +676,37 @@ TEST_F(LibYUVColorTest, TestFullYUVRec2020) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); - YUVRec2020ToRGBReference(y, u, v, &r0, &g0, &b0); - YUVRec2020ToRGB(y, u, v, &r1, &g1, &b1); + YUVFToRGBReference(y, u, v, &r0, &g0, &b0); + YUVFToRGB(y, u, v, &r1, &g1, &b1); + EXPECT_NEAR(r0, r1, 5); + EXPECT_NEAR(g0, g1, 5); + EXPECT_NEAR(b0, b1, 5); + ++rh[r1 - r0 + 128]; + ++gh[g1 - g0 + 128]; + ++bh[b1 - b0 + 128]; + } + } + } + PrintHistogram(rh, gh, bh); +} + +TEST_F(LibYUVColorTest, TestFullYUVU) { + int rh[256] = { + 0, + }; + int gh[256] = { + 0, + }; + int bh[256] = { + 0, + }; + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { + for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { + int r0, g0, b0, r1, g1, b1; + int y = RANDOM256(y2); + YUVUToRGBReference(y, u, v, &r0, &g0, &b0); + YUVUToRGB(y, u, v, &r1, &g1, &b1); EXPECT_NEAR(r0, r1, ERROR_R); EXPECT_NEAR(g0, g1, ERROR_G); // TODO(crbug.com/libyuv/863): Reduce the errors in the B channel. diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index b06f93907..c4ee33b1d 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -546,6 +546,20 @@ TESTBIPLANARTOBP(NV12, 2, 2, NV12Mirror, 2, 2) TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2) TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) +// Provide matrix wrappers +#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F420ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I420ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) +#define F422ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F422ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I422ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) +#define F444ToABGR(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, e, f, c, d, g, h, &kYvuF709Constants, i, j) +#define F444ToARGB(a, b, c, d, e, f, g, h, i, j) \ + I444ToARGBMatrix(a, b, c, d, e, f, g, h, &kYuvF709Constants, i, j) + #define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ @@ -611,6 +625,8 @@ TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 1) TESTPLANARTOB(I420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(J420, 2, 2, ARGB, 4, 4, 1) TESTPLANARTOB(J420, 2, 2, ABGR, 4, 4, 1) +TESTPLANARTOB(F420, 2, 2, ARGB, 4, 4, 1) +TESTPLANARTOB(F420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(H420, 2, 2, ARGB, 4, 4, 1) TESTPLANARTOB(H420, 2, 2, ABGR, 4, 4, 1) TESTPLANARTOB(U420, 2, 2, ARGB, 4, 4, 1) @@ -726,6 +742,12 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) #define J420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ l, m) +#define F420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F420AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) #define H420AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I420AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ l, m) @@ -744,6 +766,12 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) #define J422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ l, m) +#define F422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F422AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) #define H422AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I422AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ l, m) @@ -762,6 +790,12 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1) #define J444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvJPEGConstants, k, \ l, m) +#define F444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) +#define F444AlphaToABGR(a, b, c, d, e, f, g, h, i, j, k, l, m) \ + I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvF709Constants, k, \ + l, m) #define H444AlphaToARGB(a, b, c, d, e, f, g, h, i, j, k, l, m) \ I444AlphaToARGBMatrix(a, b, c, d, e, f, g, h, i, j, &kYuvH709Constants, k, \ l, m) @@ -2812,12 +2846,16 @@ TESTQPLANARTOE(J420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(J420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(H420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(H420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(F420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(F420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(U420Alpha, 2, 2, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(U420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(I422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(I422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(J422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(J422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) +TESTQPLANARTOE(F422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) +TESTQPLANARTOE(F422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(H422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4) TESTQPLANARTOE(H422Alpha, 2, 1, ABGR, 1, 4, ARGB, 4) TESTQPLANARTOE(U422Alpha, 2, 1, ARGB, 1, 4, ABGR, 4)