Add Detile_16 planar function for 10 bit MT2T format

- Neon and SSE2 - Any for odd widths Pixel 2 little core AArch32 build C TestDetilePlane_16 (1275 ms) TestDetilePlane (1203 ms) Neon TestDetilePlane_16 (693 ms) TestDetilePlane (660 ms) Bug: b/258474032 Change-Id: Idbd09c5e9324e4deef5f1d54090d4b63cc7db812 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4031848 Reviewed-by: Wan-Teh Chang <wtc@google.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
2025-12-06 16:56:55 +08:00 · 2022-11-16 18:02:34 -08:00 · 2022-11-16 18:02:34 -08:00 · 2d2cee418a
commit 2d2cee418a
parent 6f21862f1b
11 changed files with 279 additions and 67 deletions
--- a/README.chromium
+++ b/README.chromium
@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1848
+Version: 1849
 License: BSD
 License File: LICENSE

--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@ -85,7 +85,7 @@ void SetPlane(uint8_t* dst_y,

 // Convert a plane of tiles of 16 x H to linear.
 LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
+int DetilePlane(const uint8_t* src_y,
                int src_stride_y,
                uint8_t* dst_y,
                int dst_stride_y,
@ -93,6 +93,16 @@ void DetilePlane(const uint8_t* src_y,
                int height,
                int tile_height);

+// Convert a plane of 16 bit tiles of 16 x H to linear.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+                   int src_stride_y,
+                   uint16_t* dst_y,
+                   int dst_stride_y,
+                   int width,
+                   int height,
+                   int tile_height);
+
 // Convert a UV plane of tiles of 16 x H into linear U and V planes.
 LIBYUV_API
 void DetileSplitUVPlane(const uint8_t* src_uv,
@ -106,6 +116,7 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
                        int tile_height);

 // Convert a Y and UV plane of tiles into interlaced YUY2.
+LIBYUV_API
 void DetileToYUY2(const uint8_t* src_y,
                  int src_stride_y,
                  const uint8_t* src_uv,
@ -382,6 +393,7 @@ int I210Copy(const uint16_t* src_y,
             int height);

 // Copy NV12. Supports inverting.
+LIBYUV_API
 int NV12Copy(const uint8_t* src_y,
             int src_stride_y,
             const uint8_t* src_uv,
@ -394,6 +406,7 @@ int NV12Copy(const uint8_t* src_y,
             int height);

 // Copy NV21. Supports inverting.
+LIBYUV_API
 int NV21Copy(const uint8_t* src_y,
             int src_stride_y,
             const uint8_t* src_vu,
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@ -290,6 +290,7 @@ extern "C" {
 #define HAS_CONVERT16TO8ROW_SSSE3
 #define HAS_CONVERT8TO16ROW_SSE2
 #define HAS_DETILEROW_SSE2
+#define HAS_DETILEROW_16_SSE2
 #define HAS_DETILESPLITUVROW_SSSE3
 #define HAS_DETILETOYUY2_SSE2
 #define HAS_HALFMERGEUVROW_SSSE3
@ -449,6 +450,7 @@ extern "C" {
 #define HAS_BYTETOFLOATROW_NEON
 #define HAS_CONVERT16TO8ROW_NEON
 #define HAS_COPYROW_NEON
+#define HAS_DETILEROW_16_NEON
 #define HAS_DETILEROW_NEON
 #define HAS_DETILESPLITUVROW_NEON
 #define HAS_DETILETOYUY2_NEON
@ -823,6 +825,7 @@ struct YuvConstants {

 #endif

+#define IS_POWEROFTWO(x) (!((x) & ((x) - 1)))
 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))

 #define align_buffer_64(var, size)                                           \
@ -2012,7 +2015,6 @@ void DetileRow_C(const uint8_t* src,
                 ptrdiff_t src_tile_stride,
                 uint8_t* dst,
                 int width);
-
 void DetileRow_NEON(const uint8_t* src,
                    ptrdiff_t src_tile_stride,
                    uint8_t* dst,
@ -2029,6 +2031,26 @@ void DetileRow_Any_SSE2(const uint8_t* src,
                        ptrdiff_t src_tile_stride,
                        uint8_t* dst,
                        int width);
+void DetileRow_16_C(const uint16_t* src,
+                    ptrdiff_t src_tile_stride,
+                    uint16_t* dst,
+                    int width);
+void DetileRow_16_NEON(const uint16_t* src,
+                       ptrdiff_t src_tile_stride,
+                       uint16_t* dst,
+                       int width);
+void DetileRow_16_Any_NEON(const uint16_t* src,
+                           ptrdiff_t src_tile_stride,
+                           uint16_t* dst,
+                           int width);
+void DetileRow_16_SSE2(const uint16_t* src,
+                       ptrdiff_t src_tile_stride,
+                       uint16_t* dst,
+                       int width);
+void DetileRow_16_Any_SSE2(const uint16_t* src,
+                           ptrdiff_t src_tile_stride,
+                           uint16_t* dst,
+                           int width);
 void DetileSplitUVRow_C(const uint8_t* src_uv,
                        ptrdiff_t src_tile_stride,
                        uint8_t* dst_u,
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1848
+#define LIBYUV_VERSION 1849

 #endif  // INCLUDE_LIBYUV_VERSION_H_
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@ -385,6 +385,7 @@ int I420ToI400(const uint8_t* src_y,
 }

 // Copy NV12. Supports inverting.
+LIBYUV_API
 int NV12Copy(const uint8_t* src_y,
             int src_stride_y,
             const uint8_t* src_uv,
@ -418,6 +419,7 @@ int NV12Copy(const uint8_t* src_y,
 }

 // Copy NV21. Supports inverting.
+LIBYUV_API
 int NV21Copy(const uint8_t* src_y,
             int src_stride_y,
             const uint8_t* src_vu,
@ -916,9 +918,8 @@ int NV21ToNV12(const uint8_t* src_y,
 // tile_height is 16 or 32 for MM21.
 // src_stride_y is bytes per row of source ignoring tiling. e.g. 640
 // TODO: More detile row functions.
-
 LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
+int DetilePlane(const uint8_t* src_y,
                int src_stride_y,
                uint8_t* dst_y,
                int dst_stride_y,
@ -929,13 +930,10 @@ void DetilePlane(const uint8_t* src_y,
  int y;
  void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
                    int width) = DetileRow_C;
-  assert(src_stride_y >= 0);
-  assert(tile_height > 0);
-  assert(src_stride_y > 0);
-
-  if (width <= 0 || height == 0) {
-    return;
+  if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) {
+    return -1;
  }
+
  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
@ -970,6 +968,63 @@ void DetilePlane(const uint8_t* src_y,
      src_y = src_y - src_tile_stride + src_stride_y * tile_height;
    }
  }
+  return 0;
+}
+
+// Convert a plane of 16 bit tiles of 16 x H to linear.
+// tile width is 16 and assumed.
+// tile_height is 16 or 32 for MT2T.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+                   int src_stride_y,
+                   uint16_t* dst_y,
+                   int dst_stride_y,
+                   int width,
+                   int height,
+                   int tile_height) {
+  const ptrdiff_t src_tile_stride = 16 * tile_height;
+  int y;
+  void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride,
+                       uint16_t* dst, int width) = DetileRow_16_C;
+  if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) {
+    return -1;
+  }
+
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_y = dst_y + (height - 1) * dst_stride_y;
+    dst_stride_y = -dst_stride_y;
+  }
+
+#if defined(HAS_DETILEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    DetileRow_16 = DetileRow_16_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      DetileRow_16 = DetileRow_16_SSE2;
+    }
+  }
+#endif
+#if defined(HAS_DETILEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    DetileRow_16 = DetileRow_16_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      DetileRow_16 = DetileRow_16_NEON;
+    }
+  }
+#endif
+
+  // Detile plane
+  for (y = 0; y < height; ++y) {
+    DetileRow_16(src_y, src_tile_stride, dst_y, width);
+    dst_y += dst_stride_y;
+    src_y += 16;
+    // Advance to next row of tiles.
+    if ((y & (tile_height - 1)) == (tile_height - 1)) {
+      src_y = src_y - src_tile_stride + src_stride_y * tile_height;
+    }
+  }
+  return 0;
 }

 LIBYUV_API
--- a/source/row_any.cc
+++ b/source/row_any.cc
@ -2242,26 +2242,31 @@ ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
 #endif
 #undef ANY11S

-#define ANYDETILE(NAMEANY, ANY_SIMD, MASK)                                  \
-  void NAMEANY(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, \
-               int width) {                                                 \
-    SIMD_ALIGNED(uint8_t temp[16 * 2]);                                     \
-    memset(temp, 0, 16); /* for msan */                                     \
+#define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK)                           \
+  void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \
+    SIMD_ALIGNED(T temp[16 * 2]);                                            \
+    memset(temp, 0, 16 * BPP); /* for msan */                                \
    int r = width & MASK;                                                    \
    int n = width & ~MASK;                                                   \
    if (n > 0) {                                                             \
      ANY_SIMD(src, src_tile_stride, dst, n);                                \
    }                                                                        \
-    memcpy(temp, src + (n / 16) * src_tile_stride, r);                      \
+    memcpy(temp, src + (n / 16) * src_tile_stride, r * BPP);                 \
    ANY_SIMD(temp, src_tile_stride, temp + 16, MASK + 1);                    \
-    memcpy(dst + n, temp + 16, r);                                          \
+    memcpy(dst + n, temp + 16, r * BPP);                                     \
  }

 #ifdef HAS_DETILEROW_NEON
-ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, 15)
+ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15)
 #endif
 #ifdef HAS_DETILEROW_SSE2
-ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, 15)
+ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15)
+#endif
+#ifdef HAS_DETILEROW_16_NEON
+ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15)
+#endif
+#ifdef HAS_DETILEROW_16_SSE2
+ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15)
 #endif

 #define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK)                \
--- a/source/row_common.cc
+++ b/source/row_common.cc
@ -2748,6 +2748,21 @@ void DetileRow_C(const uint8_t* src,
  }
 }

+void DetileRow_16_C(const uint16_t* src,
+                    ptrdiff_t src_tile_stride,
+                    uint16_t* dst,
+                    int width) {
+  int x;
+  for (x = 0; x < width - 15; x += 16) {
+    memcpy(dst, src, 16 * sizeof(uint16_t));
+    dst += 16;
+    src += src_tile_stride;
+  }
+  if (width & 15) {
+    memcpy(dst, src, (width & 15) * sizeof(uint16_t));
+  }
+}
+
 void DetileSplitUVRow_C(const uint8_t* src_uv,
                        ptrdiff_t src_tile_stride,
                        uint8_t* dst_u,
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@ -5030,6 +5030,29 @@ void DetileRow_SSE2(const uint8_t* src,
 }
 #endif  // HAS_DETILEROW_SSE2

+#ifdef HAS_DETILEROW_16_SSE2
+void DetileRow_16_SSE2(const uint16_t* src,
+                       ptrdiff_t src_tile_stride,
+                       uint16_t* dst,
+                       int width) {
+  asm volatile(
+      "1:                                        \n"
+      "movdqu      (%0),%%xmm0                   \n"
+      "movdqu      0x10(%0),%%xmm1               \n"
+      "lea         (%0,%3,2),%0                  \n"
+      "movdqu      %%xmm0,(%1)                   \n"
+      "movdqu      %%xmm1,0x10(%1)               \n"
+      "lea         0x20(%1),%1                   \n"
+      "sub         $0x10,%2                      \n"
+      "jg          1b                            \n"
+      : "+r"(src),            // %0
+        "+r"(dst),            // %1
+        "+r"(width)           // %2
+      : "r"(src_tile_stride)  // %3
+      : "cc", "memory", "xmm0", "xmm1");
+}
+#endif  // HAS_DETILEROW_SSE2
+
 #ifdef HAS_DETILETOYUY2_SSE2
 // Read 16 Y, 8 UV, and write 8 YUYV.
 void DetileToYUY2_SSE2(const uint8_t* src_y,
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@ -622,6 +622,26 @@ void DetileRow_NEON(const uint8_t* src,
  );
 }

+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+                       ptrdiff_t src_tile_stride,
+                       uint16_t* dst,
+                       int width) {
+  asm volatile(
+      "1:                                        \n"
+      "vld1.16     {q0, q1}, [%0], %3            \n"  // load 16 pixels
+      "subs        %2, %2, #16                   \n"  // 16 processed per loop
+      "pld         [%0, #3584]                   \n"
+      "vst1.16     {q0, q1}, [%1]!               \n"  // store 16 pixels
+      "bgt         1b                            \n"
+      : "+r"(src),                  // %0
+        "+r"(dst),                  // %1
+        "+r"(width)                 // %2
+      : "r"(src_tile_stride * 2)    // %3
+      : "cc", "memory", "q0", "q1"  // Clobber List
+  );
+}
+
 // Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
 void DetileSplitUVRow_NEON(const uint8_t* src_uv,
                           ptrdiff_t src_tile_stride,
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@ -650,6 +650,26 @@ void DetileRow_NEON(const uint8_t* src,
  );
 }

+// Reads 16 byte Y's of 16 bits from tile and writes out 16 Y's.
+void DetileRow_16_NEON(const uint16_t* src,
+                       ptrdiff_t src_tile_stride,
+                       uint16_t* dst,
+                       int width) {
+  asm volatile(
+      "1:                                        \n"
+      "ld1         {v0.8h,v1.8h}, [%0], %3       \n"  // load 16 pixels
+      "subs        %w2, %w2, #16                 \n"  // 16 processed per loop
+      "prfm        pldl1keep, [%0, 3584]         \n"  // 7 tiles of 512b ahead
+      "st1         {v0.8h,v1.8h}, [%1], #32      \n"  // store 16 pixels
+      "b.gt        1b                            \n"
+      : "+r"(src),                  // %0
+        "+r"(dst),                  // %1
+        "+r"(width)                 // %2
+      : "r"(src_tile_stride * 2)    // %3
+      : "cc", "memory", "v0", "v1"  // Clobber List
+  );
+}
+
 // Read 16 bytes of UV, detile, and write 8 bytes of U and 8 bytes of V.
 void DetileSplitUVRow_NEON(const uint8_t* src_uv,
                           ptrdiff_t src_tile_stride,
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@ -1638,29 +1638,29 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
  int i, j;

  // orig is tiled.  Allocate enough memory for tiles.
-  int orig_width = (benchmark_width_ + 15) & ~15;
-  int orig_height = (benchmark_height_ + 15) & ~15;
-  int orig_plane_size = orig_width * orig_height;
+  int tile_width = (benchmark_width_ + 15) & ~15;
+  int tile_height = (benchmark_height_ + 15) & ~15;
+  int tile_plane_size = tile_width * tile_height;
  int y_plane_size = benchmark_width_ * benchmark_height_;
-  align_buffer_page_end(orig_y, orig_plane_size);
+  align_buffer_page_end(tile_y, tile_plane_size);
  align_buffer_page_end(dst_c, y_plane_size);
  align_buffer_page_end(dst_opt, y_plane_size);

-  MemRandomize(orig_y, orig_plane_size);
+  MemRandomize(tile_y, tile_plane_size);
  memset(dst_c, 0, y_plane_size);
  memset(dst_opt, 0, y_plane_size);

  // Disable all optimizations.
  MaskCpuFlags(disable_cpu_flags_);
  for (j = 0; j < benchmark_iterations_; j++) {
-    DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_,
+    DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
                benchmark_height_, 16);
  }

  // Enable optimizations.
  MaskCpuFlags(benchmark_cpu_info_);
  for (j = 0; j < benchmark_iterations_; j++) {
-    DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_,
+    DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
                benchmark_height_, 16);
  }

@ -1668,7 +1668,46 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) {
    EXPECT_EQ(dst_c[i], dst_opt[i]);
  }

-  free_aligned_buffer_page_end(orig_y);
+  free_aligned_buffer_page_end(tile_y);
+  free_aligned_buffer_page_end(dst_c);
+  free_aligned_buffer_page_end(dst_opt);
+}
+
+TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
+  int i, j;
+
+  // orig is tiled.  Allocate enough memory for tiles.
+  int tile_width = (benchmark_width_ + 15) & ~15;
+  int tile_height = (benchmark_height_ + 15) & ~15;
+  int tile_plane_size = tile_width * tile_height * 2;
+  int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
+  align_buffer_page_end(tile_y, tile_plane_size);
+  align_buffer_page_end(dst_c, y_plane_size);
+  align_buffer_page_end(dst_opt, y_plane_size);
+
+  MemRandomize(tile_y, tile_plane_size);
+  memset(dst_c, 0, y_plane_size);
+  memset(dst_opt, 0, y_plane_size);
+
+  // Disable all optimizations.
+  MaskCpuFlags(disable_cpu_flags_);
+  for (j = 0; j < benchmark_iterations_; j++) {
+    DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
+                   benchmark_width_, benchmark_width_, benchmark_height_, 16);
+  }
+
+  // Enable optimizations.
+  MaskCpuFlags(benchmark_cpu_info_);
+  for (j = 0; j < benchmark_iterations_; j++) {
+    DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
+                   benchmark_width_, benchmark_width_, benchmark_height_, 16);
+  }
+
+  for (i = 0; i < y_plane_size; ++i) {
+    EXPECT_EQ(dst_c[i], dst_opt[i]);
+  }
+
+  free_aligned_buffer_page_end(tile_y);
  free_aligned_buffer_page_end(dst_c);
  free_aligned_buffer_page_end(dst_opt);
 }
@ -1678,33 +1717,33 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
  int i, j;

  // orig is tiled.  Allocate enough memory for tiles.
-  int orig_width = (benchmark_width_ + 15) & ~15;
-  int orig_height = (benchmark_height_ + 15) & ~15;
-  int orig_plane_size = orig_width * orig_height;
+  int tile_width = (benchmark_width_ + 15) & ~15;
+  int tile_height = (benchmark_height_ + 15) & ~15;
+  int tile_plane_size = tile_width * tile_height;
  int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
-  align_buffer_page_end(orig_uv, orig_plane_size);
-  align_buffer_page_end(detiled_uv, orig_plane_size);
+  align_buffer_page_end(tile_uv, tile_plane_size);
+  align_buffer_page_end(detiled_uv, tile_plane_size);
  align_buffer_page_end(dst_u_two_stage, uv_plane_size);
  align_buffer_page_end(dst_u_opt, uv_plane_size);
  align_buffer_page_end(dst_v_two_stage, uv_plane_size);
  align_buffer_page_end(dst_v_opt, uv_plane_size);

-  MemRandomize(orig_uv, orig_plane_size);
-  memset(detiled_uv, 0, orig_plane_size);
+  MemRandomize(tile_uv, tile_plane_size);
+  memset(detiled_uv, 0, tile_plane_size);
  memset(dst_u_two_stage, 0, uv_plane_size);
  memset(dst_u_opt, 0, uv_plane_size);
  memset(dst_v_two_stage, 0, uv_plane_size);
  memset(dst_v_opt, 0, uv_plane_size);

-  DetileSplitUVPlane(orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2,
+  DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
                     dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
                     benchmark_height_, 16);

  // Benchmark 2 step conversion for comparison.
  for (j = 0; j < benchmark_iterations_; j++) {
-    DetilePlane(orig_uv, orig_width, detiled_uv, benchmark_width_,
+    DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
                benchmark_width_, benchmark_height_, 16);
-    SplitUVPlane(detiled_uv, orig_width, dst_u_two_stage,
+    SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
                 (benchmark_width_ + 1) / 2, dst_v_two_stage,
                 (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
                 benchmark_height_);
@ -1715,7 +1754,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
    EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
  }

-  free_aligned_buffer_page_end(orig_uv);
+  free_aligned_buffer_page_end(tile_uv);
  free_aligned_buffer_page_end(detiled_uv);
  free_aligned_buffer_page_end(dst_u_two_stage);
  free_aligned_buffer_page_end(dst_u_opt);
@ -1727,17 +1766,17 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
  int i, j;

  // orig is tiled.  Allocate enough memory for tiles.
-  int orig_width = (benchmark_width_ + 15) & ~15;
-  int orig_height = (benchmark_height_ + 15) & ~15;
-  int orig_plane_size = orig_width * orig_height;
+  int tile_width = (benchmark_width_ + 15) & ~15;
+  int tile_height = (benchmark_height_ + 15) & ~15;
+  int tile_plane_size = tile_width * tile_height;
  int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
-  align_buffer_page_end(orig_uv, orig_plane_size);
+  align_buffer_page_end(tile_uv, tile_plane_size);
  align_buffer_page_end(dst_u_c, uv_plane_size);
  align_buffer_page_end(dst_u_opt, uv_plane_size);
  align_buffer_page_end(dst_v_c, uv_plane_size);
  align_buffer_page_end(dst_v_opt, uv_plane_size);

-  MemRandomize(orig_uv, orig_plane_size);
+  MemRandomize(tile_uv, tile_plane_size);
  memset(dst_u_c, 0, uv_plane_size);
  memset(dst_u_opt, 0, uv_plane_size);
  memset(dst_v_c, 0, uv_plane_size);
@ -1746,7 +1785,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
  // Disable all optimizations.
  MaskCpuFlags(disable_cpu_flags_);

-  DetileSplitUVPlane(orig_uv, orig_width, dst_u_c, (benchmark_width_ + 1) / 2,
+  DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
                     dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
                     benchmark_height_, 16);

@ -1755,7 +1794,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {

  for (j = 0; j < benchmark_iterations_; j++) {
    DetileSplitUVPlane(
-        orig_uv, orig_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
+        tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
        (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
  }

@ -1764,7 +1803,7 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
    EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
  }

-  free_aligned_buffer_page_end(orig_uv);
+  free_aligned_buffer_page_end(tile_uv);
  free_aligned_buffer_page_end(dst_u_c);
  free_aligned_buffer_page_end(dst_u_opt);
  free_aligned_buffer_page_end(dst_v_c);