NV12ToRGB565 rewritten. Row functions with SSSE3 and NEON. Stride by bytes (bug fix). Handle odd sizes. Support inversion.

BUG=none TEST=none Review URL: http://webrtc-codereview.appspot.com/344012 git-svn-id: http://libyuv.googlecode.com/svn/trunk@134 16f28f9a-4ce2-e073-06de-1de4eb20be90
2025-12-06 16:56:55 +08:00 · 2012-01-13 19:27:42 +00:00 · 2012-01-13 19:27:42 +00:00 · 1de81bca84
commit 1de81bca84
parent ccd6d9b2de
5 changed files with 69 additions and 77 deletions
--- a/README.chromium
+++ b/README.chromium
@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 133
+Version: 134
 License: BSD
 License File: LICENSE

--- a/include/libyuv/convert.h
+++ b/include/libyuv/convert.h
@ -122,11 +122,6 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame,
               uint8* dst_v, int dst_stride_v,
               int width, int height);

-int NV12ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_uv, int src_stride_uv,
-                 uint8* dst_frame, int dst_stride_frame,
-                 int width, int height);
-
 // Convert camera sample to I420 with cropping, rotation and vertical flip.
 // "src_size" is needed to parse MJPG.
 // "dst_stride_y" number of bytes in a row of the dst_y plane.
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@ -79,6 +79,12 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
               uint8* dst_v, int dst_stride_v,
               int width, int height);

+// Convert NV12 to RGB565.  Also used for NV21.
+int NV12ToRGB565(const uint8* src_y, int src_stride_y,
+                 const uint8* src_uv, int src_stride_uv,
+                 uint8* dst_frame, int dst_stride_frame,
+                 int width, int height);
+
 // Copy to I400.  Source can be I420,422,444,400,NV12,NV21
 int I400Copy(const uint8* src_y, int src_stride_y,
             uint8* dst_y, int dst_stride_y,
--- a/source/convert.cc
+++ b/source/convert.cc
@ -318,77 +318,6 @@ loop0:
  return 0;
 }

-#if HAVE_NV12TORGB565
-int NV12ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_uv, int src_stride_uv,
-                 uint8* dst_frame, int dst_stride_frame,
-                 int width, int height) {
-  if (src_y == NULL || src_uv == NULL || dst_frame == NULL) {
-    return -1;
-  }
-
-  // Bi-Planar: Y plane followed by an interlaced U and V plane
-  const uint8* interlacedSrc = src_uv;
-  uint16* out = (uint16*)(src_y) + dst_stride_frame * (height - 1);
-  uint16* out2 = out - dst_stride_frame;
-  int32 tmp_r, tmp_g, tmp_b;
-  const uint8 *y1,*y2;
-  y1 = src_y;
-  y2 = y1 + src_stride_y;
-  int h, w;
-
-  for (h = ((height + 1) >> 1); h > 0; h--) {
-    // 2 rows at a time, 2 y's at a time
-    for (w = 0; w < ((width + 1) >> 1); w++) {
-      // Vertical and horizontal sub-sampling
-      // 1. Convert to RGB888
-      // 2. Shift to adequate location (in the 16 bit word) - RGB 565
-
-      tmp_r = (int32)((mapYc[y1[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
-      tmp_g = (int32)((mapYc[y1[0]] + mapUcg[interlacedSrc[0]]
-                      + mapVcg[interlacedSrc[1]] + 128) >> 8);
-      tmp_b = (int32)((mapYc[y1[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
-      out[0]  = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
-                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
-
-      tmp_r = (int32)((mapYc[y1[1]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
-      tmp_g = (int32)((mapYc[y1[1]] + mapUcg[interlacedSrc[0]]
-                      + mapVcg[interlacedSrc[1]] + 128) >> 8);
-      tmp_b = (int32)((mapYc[y1[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
-      out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
-                         & 0xfc) << 3) + (Clip(tmp_b ) >> 3);
-
-      tmp_r = (int32)((mapYc[y2[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
-      tmp_g = (int32)((mapYc[y2[0]] + mapUcg[interlacedSrc[0]]
-                      + mapVcg[interlacedSrc[1]] + 128) >> 8);
-      tmp_b = (int32)((mapYc[y2[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
-      out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
-                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
-
-      tmp_r = (int32)((mapYc[y2[1]] + mapVcr[interlacedSrc[1]]
-                      + 128) >> 8);
-      tmp_g = (int32)((mapYc[y2[1]] + mapUcg[interlacedSrc[0]]
-                      + mapVcg[interlacedSrc[1]] + 128) >> 8);
-      tmp_b = (int32)((mapYc[y2[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
-      out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
-                          & 0xfc) << 3) + (Clip(tmp_b) >> 3);
-
-      y1 += 2;
-      y2 += 2;
-      out += 2;
-      out2 += 2;
-      interlacedSrc += 2;
-    }
-    y1 += 2 * src_stride_y - width;
-    y2 += 2 * src_stride_y - width;
-    interlacedSrc += src_stride_uv - ((width + 1) >> 1);
-    out -= 3 * dst_stride_frame + dst_stride_frame - width;
-    out2 -= 3 * dst_stride_frame + dst_stride_frame - width;
-  }
-  return 0;
-}
-#endif
-
 // TODO(fbarchard): Deprecated - this is same as BG24ToARGB with -height
 int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
                uint8* dst_frame, int dst_stride_frame,
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@ -1621,6 +1621,67 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
  return 0;
 }

+// Convert NV12 to RGB565.
+int NV12ToRGB565(const uint8* src_y, int src_stride_y,
+                 const uint8* src_uv, int src_stride_uv,
+                 uint8* dst_argb, int dst_stride_argb,
+                 int width, int height) {
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  void (*FastConvertYUVToRGB565Row)(const uint8* y_buf,
+                                    const uint8* u_buf,
+                                    const uint8* v_buf,
+                                    uint8* rgb_buf,
+                                    int width);
+#if defined(HAS_FASTCONVERTYUVTORGB565ROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
+    FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_NEON;
+  } else
+#elif defined(HAS_FASTCONVERTYUVTORGB565ROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) &&
+      IS_ALIGNED(width, 8) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_SSSE3;
+  } else
+#endif
+  {
+    FastConvertYUVToRGB565Row = FastConvertYUVToRGB565Row_C;
+  }
+  int halfwidth = (width + 1) >> 1;
+  void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
+#if defined(HAS_SPLITUV_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
+    SplitUV = SplitUV_NEON;
+  } else
+#elif defined(HAS_SPLITUV_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(halfwidth, 16) &&
+      IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16)) {
+    SplitUV = SplitUV_SSE2;
+  } else
+#endif
+  {
+    SplitUV = SplitUV_C;
+  }
+  SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
+
+  for (int y = 0; y < height; ++y) {
+    if ((y & 1) == 0) {
+      // Copy a row of UV.
+      SplitUV(src_uv, row, row + kMaxStride, halfwidth);
+      src_uv += src_stride_uv;
+    }
+    FastConvertYUVToRGB565Row(src_y, row, row + kMaxStride, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+  }
+  return 0;
+}
+
 // Convert I420 to RGB565.
 int I420ToRGB565(const uint8* src_y, int src_stride_y,
                 const uint8* src_u, int src_stride_u,
@ -1663,6 +1724,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
  }
  return 0;
 }
+
 // Convert I420 to ARGB1555.
 int I420ToARGB1555(const uint8* src_y, int src_stride_y,
                 const uint8* src_u, int src_stride_u,