From 8e7a62f22aa7571e4bb0a40629514bf242058723 Mon Sep 17 00:00:00 2001
From: Frank Barchard <fbarchard@google.com>
Date: Wed, 12 Aug 2015 17:01:24 -0700
Subject: [PATCH] I420AlphaToARGB conversion for planar YUV with Alpha to ARGB.

R=brucedawson@chromium.org, harryjin@google.com
BUG=libyuv:473

Review URL: https://webrtc-codereview.appspot.com/54829004.
---
 README.chromium                   |   2 +-
 include/libyuv/convert_argb.h     |   9 +++
 include/libyuv/planar_functions.h |   4 +-
 include/libyuv/version.h          |   2 +-
 source/convert_argb.cc            | 119 ++++++++++++++++++++++++++++++
 unit_test/convert_test.cc         |  87 ++++++++++++++++++++++
 6 files changed, 219 insertions(+), 4 deletions(-)

diff --git a/README.chromium b/README.chromium
index 6df39a4c1..d65c25712 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1463
+Version: 1464
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h
index 360c6d359..b61be93c5 100644
--- a/include/libyuv/convert_argb.h
+++ b/include/libyuv/convert_argb.h
@@ -68,6 +68,15 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height);
 
+// Convert I420 with Alpha to preattenuated ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    const uint8* src_a, int src_stride_a,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
 // Convert I400 (grey) to ARGB.  Reverse of ARGBToI400.
 LIBYUV_API
 int I400ToARGB(const uint8* src_y, int src_stride_y,
diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index ae994db89..868c1d1a4 100644
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -281,13 +281,13 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
              uint8* dst_argb, int dst_stride_argb,
              int width, int height);
 
-// Copy ARGB to ARGB.
+// Copy Alpha channel of ARGB to alpha of ARGB.
 LIBYUV_API
 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
                   uint8* dst_argb, int dst_stride_argb,
                   int width, int height);
 
-// Copy ARGB to ARGB.
+// Copy Y channel to Alpha of ARGB.
 LIBYUV_API
 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
                      uint8* dst_argb, int dst_stride_argb,
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index 0bab6d2e3..86856bdaf 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1463
+#define LIBYUV_VERSION 1464
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index 44756bc41..87434b172 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -257,6 +257,125 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
   return 0;
 }
 
+// Convert I420 with Alpha to preattenuated ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    const uint8* src_a, int src_stride_a,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height) {
+  int y;
+  void (*I422ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width) = I422ToARGBRow_C;
+  void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
+      ARGBCopyYToAlphaRow_C;
+  void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
+                           int width) = ARGBAttenuateRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToARGBRow = I422ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+  }
+#endif
+#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
+    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
+  }
+#endif
+#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
+    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    ARGBCopyYToAlphaRow(src_a, dst_argb, width);
+    ARGBAttenuateRow(dst_argb, dst_argb, width);
+    dst_argb += dst_stride_argb;
+    src_a += src_stride_a;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
 // Convert I400 to ARGB.
 LIBYUV_API
 int I400ToARGB(const uint8* src_y, int src_stride_y,
diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc
index 8faacbe52..cc814fd44 100644
--- a/unit_test/convert_test.cc
+++ b/unit_test/convert_test.cc
@@ -514,6 +514,93 @@ TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1, 0, ARGB, 4)
 TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4)
 TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4)
 
+
+
+#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
+                       YALIGN, W1280, DIFF, N, NEG, OFF, FMT_C, BPP_C)         \
+TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \
+  const int kWidth = ((W1280) > 0) ? (W1280) : 1;                              \
+  const int kHeight = ALIGNINT(benchmark_height_, YALIGN);                     \
+  const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN);                        \
+  const int kSizeUV =                                                          \
+    SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y);              \
+  align_buffer_64(src_y, kWidth * kHeight + OFF);                              \
+  align_buffer_64(src_u, kSizeUV + OFF);                                       \
+  align_buffer_64(src_v, kSizeUV + OFF);                                       \
+  align_buffer_64(src_a, kWidth * kHeight + OFF);                              \
+  align_buffer_64(dst_argb_c, kStrideB * kHeight + OFF);                       \
+  align_buffer_64(dst_argb_opt, kStrideB * kHeight + OFF);                     \
+  srandom(time(NULL));                                                         \
+  for (int i = 0; i < kWidth * kHeight; ++i) {                                 \
+    src_y[i + OFF] = (random() & 0xff);                                        \
+    src_a[i + OFF] = (random() & 0xff);                                        \
+  }                                                                            \
+  for (int i = 0; i < kSizeUV; ++i) {                                          \
+    src_u[i + OFF] = (random() & 0xff);                                        \
+    src_v[i + OFF] = (random() & 0xff);                                        \
+  }                                                                            \
+  memset(dst_argb_c + OFF, 1, kStrideB * kHeight);                             \
+  memset(dst_argb_opt + OFF, 101, kStrideB * kHeight);                         \
+  MaskCpuFlags(disable_cpu_flags_);                                            \
+  FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                   \
+                        src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),             \
+                        src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),             \
+                        src_a + OFF, kWidth,                                   \
+                        dst_argb_c + OFF, kStrideB,                            \
+                        kWidth, NEG kHeight);                                  \
+  MaskCpuFlags(-1);                                                            \
+  for (int i = 0; i < benchmark_iterations_; ++i) {                            \
+    FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth,                                 \
+                          src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),           \
+                          src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X),           \
+                          src_a + OFF, kWidth,                                 \
+                          dst_argb_opt + OFF, kStrideB,                        \
+                          kWidth, NEG kHeight);                                \
+  }                                                                            \
+  int max_diff = 0;                                                            \
+  /* Convert to ARGB so 565 is expanded to bytes that can be compared. */      \
+  align_buffer_64(dst_argb32_c, kWidth * BPP_C  * kHeight);                    \
+  align_buffer_64(dst_argb32_opt, kWidth * BPP_C  * kHeight);                  \
+  memset(dst_argb32_c, 2, kWidth * BPP_C  * kHeight);                          \
+  memset(dst_argb32_opt, 102, kWidth * BPP_C  * kHeight);                      \
+  FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB,                                 \
+                   dst_argb32_c, kWidth * BPP_C ,                              \
+                   kWidth, kHeight);                                           \
+  FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB,                               \
+                   dst_argb32_opt, kWidth * BPP_C ,                            \
+                   kWidth, kHeight);                                           \
+  for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) {                         \
+    int abs_diff =                                                             \
+        abs(static_cast<int>(dst_argb32_c[i]) -                                \
+            static_cast<int>(dst_argb32_opt[i]));                              \
+    if (abs_diff > max_diff) {                                                 \
+      max_diff = abs_diff;                                                     \
+    }                                                                          \
+  }                                                                            \
+  EXPECT_LE(max_diff, DIFF);                                                   \
+  free_aligned_buffer_64(src_y);                                               \
+  free_aligned_buffer_64(src_u);                                               \
+  free_aligned_buffer_64(src_v);                                               \
+  free_aligned_buffer_64(src_a);                                               \
+  free_aligned_buffer_64(dst_argb_c);                                          \
+  free_aligned_buffer_64(dst_argb_opt);                                        \
+  free_aligned_buffer_64(dst_argb32_c);                                        \
+  free_aligned_buffer_64(dst_argb32_opt);                                      \
+}
+
+#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,  \
+                       YALIGN, DIFF, FMT_C, BPP_C)                             \
+    TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,     \
+        YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, FMT_C, BPP_C)          \
+    TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,     \
+        YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, FMT_C, BPP_C)        \
+    TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,     \
+        YALIGN, benchmark_width_, DIFF, _Invert, -, 0, FMT_C, BPP_C)           \
+    TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN,     \
+        YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
+
+TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2, ARGB, 4)
+
 #define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B,       \
                          W1280, DIFF, N, NEG, OFF)                             \
 TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) {                                 \