diff --git a/README.chromium b/README.chromium index 2bb933b46..02d34b791 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 170 +Version: 171 License: BSD License File: LICENSE diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 72f030608..907a703d5 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -133,6 +133,13 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, uint8* dst_v, int dst_stride_v, int width, int height); +// Convert V210 to I420. +int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + // Convert I420 to ARGB. int I420ToARGB(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 9bce25cea..bada3b274 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,7 +11,7 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 170 +#define LIBYUV_VERSION 171 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/include/libyuv/video_common.h b/include/libyuv/video_common.h index 75f0c7b4f..cb8237314 100644 --- a/include/libyuv/video_common.h +++ b/include/libyuv/video_common.h @@ -50,6 +50,7 @@ enum FourCC { FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), FOURCC_M420 = FOURCC('M', '4', '2', '0'), FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), + FOURCC_V210 = FOURCC('V', '2', '1', '0'), FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), diff --git a/source/convert.cc b/source/convert.cc index 558757793..6d2dd2f14 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -361,7 +361,6 @@ int RGB24ToARGB(const uint8* src_frame, int src_stride_frame, // READSAFE_ALWAYS - enables read ahead on systems without memory exceptions // or where buffers are padded by 64 bytes. - #define READSAFE_ODDHEIGHT static bool TestReadSafe(const uint8* src_yuy2, int src_stride_yuy2, @@ -374,7 +373,8 @@ static bool TestReadSafe(const uint8* src_yuy2, int src_stride_yuy2, #elif defined(READSAFE_NEVER) return false; #elif defined(READSAFE_ODDHEIGHT) - if (src_stride_yuy2 >= 0 && (height & 1) && width * bpp >= overread) { + if (!(width & 15) || + (src_stride_yuy2 >= 0 && (height & 1) && width * bpp >= overread)) { return true; } return false; @@ -404,14 +404,14 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, src_stride_yuy2 = -src_stride_yuy2; } void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int pix); void (*YUY2ToYRow)(const uint8* src_yuy2, - uint8* dst_y, int pix); + uint8* dst_y, int pix); YUY2ToYRow = YUY2ToYRow_C; YUY2ToUVRow = YUY2ToUVRow_C; #if defined(HAS_YUY2TOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - if (TestReadSafe(src_yuy2, src_stride_yuy2, width, height, 2, 32)) { + if (width > 16) { YUY2ToUVRow = YUY2ToUVRow_Any_SSE2; YUY2ToYRow = YUY2ToYRow_Any_SSE2; } @@ -437,8 +437,8 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, dst_v += dst_stride_v; } if (height & 1) { - YUY2ToUVRow_C(src_yuy2, 0, dst_u, dst_v, width); - YUY2ToYRow_C(src_yuy2, dst_y, width); + YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width); + YUY2ToYRow(src_yuy2, dst_y, width); } return 0; } @@ -456,14 +456,14 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, src_stride_uyvy = -src_stride_uyvy; } void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + uint8* dst_u, uint8* dst_v, int pix); void (*UYVYToYRow)(const uint8* src_uyvy, - uint8* dst_y, int pix); + uint8* dst_y, int pix); UYVYToYRow = UYVYToYRow_C; UYVYToUVRow = UYVYToUVRow_C; #if defined(HAS_UYVYTOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - if (TestReadSafe(src_uyvy, src_stride_uyvy, width, height, 2, 32)) { + if (width > 16) { UYVYToUVRow = UYVYToUVRow_Any_SSE2; UYVYToYRow = UYVYToYRow_Any_SSE2; } @@ -489,8 +489,113 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, dst_v += dst_stride_v; } if (height & 1) { - UYVYToUVRow_C(src_uyvy, 0, dst_u, dst_v, width); - UYVYToYRow_C(src_uyvy, dst_y, width); + UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width); + UYVYToYRow(src_uyvy, dst_y, width); + } + return 0; +} + +// gcc provided macros +#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define LIBYUV_LITTLE_ENDIAN +#endif +// Visual C for x86 defines these +#elif defined(_M_X64) || defined(_M_IX86) +#define LIBYUV_LITTLE_ENDIAN +#endif + +#ifdef LIBYUV_LITTLE_ENDIAN +#define READWORD(p) (*((uint32*) (p))) +#else +uint32 READWORD(const uint8* p) { + return (uint32) p[0] | + ((uint32) (p[1]) << 8) | + ((uint32) (p[2]) << 16) | + ((uint32) (p[3]) << 24); +} +#endif + +// Must be multiple of 6 pixels. Will over convert to handle remainder. +// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210 +void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) { + for (int x = 0; x < width; x += 6) { + uint32 w = READWORD(src_v210 + 0); + dst_uyvy[0] = (w >> 2) & 0xff; + dst_uyvy[1] = (w >> 12) & 0xff; + dst_uyvy[2] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 4); + dst_uyvy[3] = (w >> 2) & 0xff; + dst_uyvy[4] = (w >> 12) & 0xff; + dst_uyvy[5] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 8); + dst_uyvy[6] = (w >> 2) & 0xff; + dst_uyvy[7] = (w >> 12) & 0xff; + dst_uyvy[8] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 12); + dst_uyvy[9] = (w >> 2) & 0xff; + dst_uyvy[10] = (w >> 12) & 0xff; + dst_uyvy[11] = (w >> 22) & 0xff; + + dst_uyvy += 12; + src_v210 += 16; + } +} + +// Convert V210 to I420. +// V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels. +// With is multiple of 48. +int V210ToI420(const uint8* src_v210, int src_stride_v210, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (width * 16 / 6 > kMaxStride) { // row buffer is required + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_v210 = src_v210 + (height - 1) * src_stride_v210; + src_stride_v210 = -src_stride_v210; + } + SIMD_ALIGNED(uint8 row[kMaxStride * 2]); + void (*V210ToUYVYRow)(const uint8* src_v210, uint8* dst_uyvy, int pix); + V210ToUYVYRow = V210ToUYVYRow_C; + + void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, + uint8* dst_u, uint8* dst_v, int pix); + void (*UYVYToYRow)(const uint8* src_uyvy, + uint8* dst_y, int pix); + UYVYToYRow = UYVYToYRow_C; + UYVYToUVRow = UYVYToUVRow_C; +#if defined(HAS_UYVYTOYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { + UYVYToUVRow = UYVYToUVRow_SSE2; + UYVYToYRow = UYVYToYRow_Unaligned_SSE2; + if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + UYVYToYRow = UYVYToYRow_SSE2; + } + } +#endif + for (int y = 0; y < height - 1; y += 2) { + V210ToUYVYRow(src_v210, row, width); + V210ToUYVYRow(src_v210 + src_stride_v210, row + kMaxStride, width); + UYVYToUVRow(row, kMaxStride, dst_u, dst_v, width); + UYVYToYRow(row, dst_y, width); + UYVYToYRow(row + kMaxStride, dst_y + dst_stride_y, width); + src_v210 += src_stride_v210 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + V210ToUYVYRow(src_v210, row, width); + UYVYToUVRow(row, 0, dst_u, dst_v, width); + UYVYToYRow(row, dst_y, width); } return 0; } @@ -513,10 +618,8 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, ARGBToUVRow = ARGBToUVRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - if (TestReadSafe(src_argb, src_stride_argb, width, height, 4, 64)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - } + if (width > 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; } if (IS_ALIGNED(width, 16)) { @@ -566,10 +669,8 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, BGRAToUVRow = BGRAToUVRow_C; #if defined(HAS_BGRATOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - if (TestReadSafe(src_bgra, src_stride_bgra, width, height, 4, 64)) { - if (IS_ALIGNED(width, 2)) { - BGRAToUVRow = BGRAToUVRow_Any_SSSE3; - } + if (width > 16) { + BGRAToUVRow = BGRAToUVRow_Any_SSSE3; BGRAToYRow = BGRAToYRow_Any_SSSE3; } if (IS_ALIGNED(width, 16)) { @@ -595,8 +696,8 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, dst_v += dst_stride_v; } if (height & 1) { - BGRAToUVRow_C(src_bgra, 0, dst_u, dst_v, width); - BGRAToYRow_C(src_bgra, dst_y, width); + BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width); + BGRAToYRow(src_bgra, dst_y, width); } return 0; } @@ -619,10 +720,8 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, ABGRToUVRow = ABGRToUVRow_C; #if defined(HAS_ABGRTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - if (TestReadSafe(src_abgr, src_stride_abgr, width, height, 4, 64)) { - if (IS_ALIGNED(width, 2)) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; - } + if (width > 16) { + ABGRToUVRow = ABGRToUVRow_Any_SSSE3; ABGRToYRow = ABGRToYRow_Any_SSSE3; } if (IS_ALIGNED(width, 16)) { @@ -648,8 +747,8 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, dst_v += dst_stride_v; } if (height & 1) { - ABGRToUVRow_C(src_abgr, 0, dst_u, dst_v, width); - ABGRToYRow_C(src_abgr, dst_y, width); + ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width); + ABGRToYRow(src_abgr, dst_y, width); } return 0; } @@ -686,7 +785,7 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, ARGBToUVRow = ARGBToUVRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - if (IS_ALIGNED(width, 2)) { + if (width > 16) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; } ARGBToYRow = ARGBToYRow_Any_SSSE3; @@ -751,7 +850,7 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw, ARGBToUVRow = ARGBToUVRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - if (IS_ALIGNED(width, 2)) { + if (width > 16) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; } ARGBToYRow = ARGBToYRow_Any_SSSE3; @@ -816,7 +915,7 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, ARGBToUVRow = ARGBToUVRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - if (IS_ALIGNED(width, 2)) { + if (width > 16) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; } ARGBToYRow = ARGBToYRow_Any_SSSE3; @@ -881,7 +980,7 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, ARGBToUVRow = ARGBToUVRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - if (IS_ALIGNED(width, 2)) { + if (width > 16) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; } ARGBToYRow = ARGBToYRow_Any_SSSE3; @@ -947,7 +1046,7 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, ARGBToUVRow = ARGBToUVRow_C; #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - if (IS_ALIGNED(width, 2)) { + if (width > 16) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; } ARGBToYRow = ARGBToYRow_Any_SSSE3; @@ -1026,6 +1125,15 @@ int ConvertToI420(const uint8* sample, size_t sample_size, v, v_stride, dst_width, inv_dst_height); break; + case FOURCC_V210: + // TODO(fbarchard): Confirm stride is 16 bytes per 6 pixels. + src = sample + (aligned_src_width * crop_y + crop_x) * 16 / 6; + V210ToI420(src, aligned_src_width * 16 / 6, + y, y_stride, + u, u_stride, + v, v_stride, + dst_width, inv_dst_height); + break; case FOURCC_24BG: src = sample + (src_width * crop_y + crop_x) * 3; RGB24ToI420(src, src_width * 3, diff --git a/source/row_common.cc b/source/row_common.cc index 4fccc0593..3d6adf5ac 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -455,34 +455,32 @@ MAKEYUVANYRGB(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 2) #ifdef HAS_ARGBTOYROW_SSSE3 -#define MAKEANYTOYANY(NAMEANY, ARGBTOY) \ +#define MAKEYANY(NAMEANY, ARGBTOY_SSE, BPP) \ void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ - SIMD_ALIGNED(uint8 row[kMaxStride]); \ - ARGBTOY(src_argb, row, width); \ - memcpy(dst_y, row, width); \ + ARGBTOY_SSE(src_argb, dst_y, width - 16); \ + ARGBTOY_SSE(src_argb + (width - 16) * BPP, dst_y + (width - 16), 16); \ } -MAKEANYTOYANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3) -MAKEANYTOYANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3) -MAKEANYTOYANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3) -MAKEANYTOYANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2) -MAKEANYTOYANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2) +MAKEYANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4) +MAKEYANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4) +MAKEYANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4) +MAKEYANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2) +MAKEYANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2) -#define MAKEANYTOUVANY(NAMEANY, ARGBTOUV) \ +#define MAKEUVANY(NAMEANY, ARGBTOUV_SSE, ARGBTOUV_C, BPP) \ void NAMEANY(const uint8* src_argb0, int src_stride_argb, \ uint8* dst_u, uint8* dst_v, int width) { \ - SIMD_ALIGNED(uint8 row[kMaxStride * 2]); \ - ARGBTOUV(src_argb0, src_stride_argb, row, row + kMaxStride, width); \ - int halfwidth = (width + 1) >> 1; \ - memcpy(dst_u, row, halfwidth); \ - memcpy(dst_v, row + kMaxStride, halfwidth); \ + ARGBTOUV_SSE(src_argb0, src_stride_argb, dst_u, dst_v, width & ~15); \ + ARGBTOUV_C(src_argb0 + (width & ~15) * BPP, src_stride_argb, \ + dst_u + (width & ~15) / 2, dst_v + (width & ~15) / 2, \ + width & 15); \ } -MAKEANYTOUVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3) -MAKEANYTOUVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3) -MAKEANYTOUVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3) -MAKEANYTOUVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2) -MAKEANYTOUVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2) +MAKEUVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4) +MAKEUVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4) +MAKEUVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4) +MAKEUVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2) +MAKEUVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2) #endif #ifdef __cplusplus