From 1de81bca841d556f83f2d0c4bc92a0a5cd943d1f Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Fri, 13 Jan 2012 19:27:42 +0000 Subject: [PATCH] NV12ToRGB565 rewritten. Row functions with SSSE3 and NEON. Stride by bytes (bug fix). Handle odd sizes. Support inversion. BUG=none TEST=none Review URL: http://webrtc-codereview.appspot.com/344012 git-svn-id: http://libyuv.googlecode.com/svn/trunk@134 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/convert.h | 5 --- include/libyuv/planar_functions.h | 6 +++ source/convert.cc | 71 ------------------------------- source/planar_functions.cc | 62 +++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 77 deletions(-) diff --git a/README.chromium b/README.chromium index 7e1df26af..9a6b84c58 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 133 +Version: 134 License: BSD License File: LICENSE diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 35663f0f5..9bd7ad4bf 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -122,11 +122,6 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame, uint8* dst_v, int dst_stride_v, int width, int height); -int NV12ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - // Convert camera sample to I420 with cropping, rotation and vertical flip. // "src_size" is needed to parse MJPG. // "dst_stride_y" number of bytes in a row of the dst_y plane. diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index 730697e6d..26e9eb275 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -79,6 +79,12 @@ int I400ToI420(const uint8* src_y, int src_stride_y, uint8* dst_v, int dst_stride_v, int width, int height); +// Convert NV12 to RGB565. Also used for NV21. +int NV12ToRGB565(const uint8* src_y, int src_stride_y, + const uint8* src_uv, int src_stride_uv, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + // Copy to I400. Source can be I420,422,444,400,NV12,NV21 int I400Copy(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, diff --git a/source/convert.cc b/source/convert.cc index 7ff8f326c..dece0b392 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -318,77 +318,6 @@ loop0: return 0; } -#if HAVE_NV12TORGB565 -int NV12ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_frame, int dst_stride_frame, - int width, int height) { - if (src_y == NULL || src_uv == NULL || dst_frame == NULL) { - return -1; - } - - // Bi-Planar: Y plane followed by an interlaced U and V plane - const uint8* interlacedSrc = src_uv; - uint16* out = (uint16*)(src_y) + dst_stride_frame * (height - 1); - uint16* out2 = out - dst_stride_frame; - int32 tmp_r, tmp_g, tmp_b; - const uint8 *y1,*y2; - y1 = src_y; - y2 = y1 + src_stride_y; - int h, w; - - for (h = ((height + 1) >> 1); h > 0; h--) { - // 2 rows at a time, 2 y's at a time - for (w = 0; w < ((width + 1) >> 1); w++) { - // Vertical and horizontal sub-sampling - // 1. Convert to RGB888 - // 2. Shift to adequate location (in the 16 bit word) - RGB 565 - - tmp_r = (int32)((mapYc[y1[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8); - tmp_g = (int32)((mapYc[y1[0]] + mapUcg[interlacedSrc[0]] - + mapVcg[interlacedSrc[1]] + 128) >> 8); - tmp_b = (int32)((mapYc[y1[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8); - out[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g) - & 0xfc) << 3) + (Clip(tmp_b) >> 3); - - tmp_r = (int32)((mapYc[y1[1]] + mapVcr[interlacedSrc[1]] + 128) >> 8); - tmp_g = (int32)((mapYc[y1[1]] + mapUcg[interlacedSrc[0]] - + mapVcg[interlacedSrc[1]] + 128) >> 8); - tmp_b = (int32)((mapYc[y1[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8); - out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g) - & 0xfc) << 3) + (Clip(tmp_b ) >> 3); - - tmp_r = (int32)((mapYc[y2[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8); - tmp_g = (int32)((mapYc[y2[0]] + mapUcg[interlacedSrc[0]] - + mapVcg[interlacedSrc[1]] + 128) >> 8); - tmp_b = (int32)((mapYc[y2[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8); - out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g) - & 0xfc) << 3) + (Clip(tmp_b) >> 3); - - tmp_r = (int32)((mapYc[y2[1]] + mapVcr[interlacedSrc[1]] - + 128) >> 8); - tmp_g = (int32)((mapYc[y2[1]] + mapUcg[interlacedSrc[0]] - + mapVcg[interlacedSrc[1]] + 128) >> 8); - tmp_b = (int32)((mapYc[y2[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8); - out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g) - & 0xfc) << 3) + (Clip(tmp_b) >> 3); - - y1 += 2; - y2 += 2; - out += 2; - out2 += 2; - interlacedSrc += 2; - } - y1 += 2 * src_stride_y - width; - y2 += 2 * src_stride_y - width; - interlacedSrc += src_stride_uv - ((width + 1) >> 1); - out -= 3 * dst_stride_frame + dst_stride_frame - width; - out2 -= 3 * dst_stride_frame + dst_stride_frame - width; - } - return 0; -} -#endif - // TODO(fbarchard): Deprecated - this is same as BG24ToARGB with -height int RGB24ToARGB(const uint8* src_frame, int src_stride_frame, uint8* dst_frame, int dst_stride_frame, diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 2f79cc72e..5a8f50aa8 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1621,6 +1621,67 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, return 0; } +// Convert NV12 to RGB565. +int NV12ToRGB565(const uint8* src_y, int src_stride_y, + const uint8* src_uv, int src_stride_uv, + uint8* dst_argb, int dst_stride_argb, + int width, int height) { + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; + dst_stride_argb = -dst_stride_argb; + } + void (*FastConvertYUVToRGB565Row)(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +#if defined(HAS_FASTCONVERTYUVTORGB565ROW_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { + FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_NEON; + } else +#elif defined(HAS_FASTCONVERTYUVTORGB565ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && + IS_ALIGNED(width, 8) && + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { + FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_SSSE3; + } else +#endif + { + FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_C; + } + int halfwidth = (width + 1) >> 1; + void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +#if defined(HAS_SPLITUV_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { + SplitUV = SplitUV_NEON; + } else +#elif defined(HAS_SPLITUV_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(halfwidth, 16) && + IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16)) { + SplitUV = SplitUV_SSE2; + } else +#endif + { + SplitUV = SplitUV_C; + } + SIMD_ALIGNED(uint8 row[kMaxStride * 2]); + + for (int y = 0; y < height; ++y) { + if ((y & 1) == 0) { + // Copy a row of UV. + SplitUV(src_uv, row, row + kMaxStride, halfwidth); + src_uv += src_stride_uv; + } + FastConvertYUVToRGB565Row(src_y, row, row + kMaxStride, dst_argb, width); + dst_argb += dst_stride_argb; + src_y += src_stride_y; + } + return 0; +} + // Convert I420 to RGB565. int I420ToRGB565(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, @@ -1663,6 +1724,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, } return 0; } + // Convert I420 to ARGB1555. int I420ToARGB1555(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u,