From 3de12ae1c6e162f469c1d384430e2bf8dea32f6f Mon Sep 17 00:00:00 2001 From: "frkoenig@google.com" Date: Wed, 19 Oct 2011 17:52:15 +0000 Subject: [PATCH] I420 and NV12 rotate functions. Consolidate rotate files. Add unit tests for I420 and NV12 rotate functions. Fix remaining pitch/stride references. Review URL: http://webrtc-codereview.appspot.com/239001 git-svn-id: http://libyuv.googlecode.com/svn/trunk@32 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- include/libyuv/general.h | 19 - include/libyuv/rotate.h | 50 ++ libyuv.gyp | 1 - source/general.cc | 63 -- source/rotate.cc | 381 +++++++-- source/rotate.h | 46 -- source/rotate_deinterleave.cc | 171 ---- source/rotate_deinterleave_neon.s | 310 -------- source/rotate_neon.s | 371 ++++++++- source/rotate_priv.h | 72 ++ unit_test/rotate_test.cc | 1204 ++++++++++++++++++++++++----- unit_test/unit_test.h | 5 +- 12 files changed, 1809 insertions(+), 884 deletions(-) create mode 100644 include/libyuv/rotate.h delete mode 100644 source/rotate.h delete mode 100644 source/rotate_deinterleave.cc delete mode 100644 source/rotate_deinterleave_neon.s create mode 100644 source/rotate_priv.h diff --git a/include/libyuv/general.h b/include/libyuv/general.h index 3cd9d3234..58943c866 100644 --- a/include/libyuv/general.h +++ b/include/libyuv/general.h @@ -20,14 +20,6 @@ namespace libyuv { -// Supported rotation -enum RotationMode { - kRotateNone = 0, - kRotateClockwise = 90, - kRotateCounterClockwise = 270, - kRotate180 = 180, -}; - // I420 mirror int I420Mirror(const uint8* src_yplane, int src_ystride, @@ -50,17 +42,6 @@ I420Crop(uint8* frame, int src_width, int src_height, int dst_width, int dst_height); -// Rotate I420 frame -int -I420Rotate(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int width, int height, - RotationMode mode); - } // namespace libyuv #endif // INCLUDE_LIBYUV_GENERAL_H_ diff --git a/include/libyuv/rotate.h b/include/libyuv/rotate.h new file mode 100644 index 000000000..43b166a4b --- /dev/null +++ b/include/libyuv/rotate.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef INCLUDE_LIBYUV_ROTATE_H_ +#define INCLUDE_LIBYUV_ROTATE_H_ + +#include "libyuv/basic_types.h" + +namespace libyuv { + +// Supported rotation +enum RotationMode { + kRotateNone = 0, + kRotateClockwise = 90, + kRotateCounterClockwise = 270, + kRotate180 = 180, +}; + +// Rotate I420 frame +int +I420Rotate(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height, + RotationMode mode); + +// Split a NV12 input buffer into Y, U, V buffers and +// then rotate the buffers. +int +NV12ToI420Rotate(const uint8* src_y, int src_stride_y, + const uint8* src_uv, int src_stride_uv, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height, + RotationMode mode); + +} // namespace libyuv + +#endif // INCLUDE_LIBYUV_ROTATE_H_ diff --git a/libyuv.gyp b/libyuv.gyp index b8287e60b..d5abab73c 100644 --- a/libyuv.gyp +++ b/libyuv.gyp @@ -44,7 +44,6 @@ 'source/general.cc', 'source/planar_functions.cc', 'source/rotate.cc', - 'source/rotate_deinterleave.cc', 'source/row_table.cc', 'source/scale.cc', 'source/video_common.cc', diff --git a/source/general.cc b/source/general.cc index 27f97bdc4..9d39f9bfb 100644 --- a/source/general.cc +++ b/source/general.cc @@ -13,7 +13,6 @@ #include // memcpy(), memset() #include "libyuv/planar_functions.h" -#include "rotate.h" namespace libyuv { @@ -282,66 +281,4 @@ I420CropPad(const uint8* src_frame, int src_width, return 0; } -int -I420Rotate(const uint8* src_yplane, int src_ystride, - const uint8* src_uplane, int src_ustride, - const uint8* src_vplane, int src_vstride, - uint8* dst_yplane, int dst_ystride, - uint8* dst_uplane, int dst_ustride, - uint8* dst_vplane, int dst_vstride, - int width, int height, - RotationMode mode) { - switch (mode) { - case kRotateNone: - // copy frame - return I420Copy(src_yplane, src_ystride, - src_uplane, src_ustride, - src_vplane, src_vstride, - dst_yplane, dst_ystride, - dst_uplane, dst_ustride, - dst_vplane, dst_vstride, - width, height); - break; - case kRotateClockwise: - Rotate90(src_yplane, src_ystride, - dst_yplane, dst_ystride, - width, height); - Rotate90(src_uplane, src_ustride, - dst_uplane, dst_ustride, - width, height); - Rotate90(src_vplane, src_vstride, - dst_vplane, dst_vstride, - width, height); - return 0; - break; - case kRotateCounterClockwise: - Rotate270(src_yplane, src_ystride, - dst_yplane, dst_ystride, - width, height); - Rotate270(src_uplane, src_ustride, - dst_uplane, dst_ustride, - width, height); - Rotate270(src_vplane, src_vstride, - dst_vplane, dst_vstride, - width, height); - return 0; - break; - case kRotate180: - Rotate180(src_yplane, src_ystride, - dst_yplane, dst_ystride, - width, height); - Rotate180(src_uplane, src_ustride, - dst_uplane, dst_ustride, - width, height); - Rotate180(src_vplane, src_vstride, - dst_vplane, dst_vstride, - width, height); - return 0; - break; - default: - return -1; - break; - } -} - } // namespace libyuv diff --git a/source/rotate.cc b/source/rotate.cc index 7d2c512c8..56b0ba2ad 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -8,107 +8,135 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "rotate.h" +#include "libyuv/planar_functions.h" +#include "libyuv/rotate.h" +#include "rotate_priv.h" namespace libyuv { +typedef void (*reverse_uv_func)(const uint8*, uint8*, uint8*, int); typedef void (*reverse_func)(const uint8*, uint8*, int); -typedef void (*rotate_wx8func)(const uint8*, int, uint8*, int, int); -typedef void (*rotate_wxhfunc)(const uint8*, int, uint8*, int, int, int); +typedef void (*rotate_uv_wx8_func)(const uint8*, int, + uint8*, int, + uint8*, int, int); +typedef void (*rotate_uv_wxh_func)(const uint8*, int, + uint8*, int, + uint8*, int, int, int); +typedef void (*rotate_wx8_func)(const uint8*, int, uint8*, int, int); +typedef void (*rotate_wxh_func)(const uint8*, int, uint8*, int, int, int); #ifdef __ARM_NEON__ extern "C" { +void RestoreRegisters_NEON(unsigned long long *restore); +void SaveRegisters_NEON(unsigned long long *store); void ReverseLine_NEON(const uint8* src, uint8* dst, int width); -void Transpose_wx8_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); +void ReverseLineUV_NEON(const uint8* src, + uint8* dst_a, uint8* dst_b, + int width); +void TransposeWx8_NEON(const uint8* src, int src_stride, + uint8* dst, int dst_stride, int width); +void TransposeUVWx8_NEON(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width); } // extern "C" #endif -static void Transpose_wx8_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int w) { +static void TransposeWx8_C(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int w) { int i, j; for (i = 0; i < w; ++i) for (j = 0; j < 8; ++j) dst[i * dst_stride + j] = src[j * src_stride + i]; } -static void Transpose_wxh_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { +static void TransposeWxH_C(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height) { int i, j; for (i = 0; i < width; ++i) for (j = 0; j < height; ++j) dst[i * dst_stride + j] = src[j * src_stride + i]; } -void Transpose(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { +void TransposePlane(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height) { int i = height; - rotate_wx8func Transpose_wx8; - rotate_wxhfunc Transpose_wxh; + rotate_wx8_func TransposeWx8; + rotate_wxh_func TransposeWxH; // do processor detection here. #ifdef __ARM_NEON__ - Transpose_wx8 = Transpose_wx8_NEON; - Transpose_wxh = Transpose_wxh_C; + TransposeWx8 = TransposeWx8_NEON; + TransposeWxH = TransposeWxH_C; #else - Transpose_wx8 = Transpose_wx8_C; - Transpose_wxh = Transpose_wxh_C; + TransposeWx8 = TransposeWx8_C; + TransposeWxH = TransposeWxH_C; #endif // work across the source in 8x8 tiles - do { - Transpose_wx8(src, src_stride, dst, dst_stride, width); + while (i >= 8) { + TransposeWx8(src, src_stride, dst, dst_stride, width); - src += 8 * src_stride; - dst += 8; + src += 8 * src_stride; // go down 8 rows + dst += 8; // move over 8 columns i -= 8; - } while (i >= 8); + } -// TODO(frkoenig): Have wx4 and maybe wx2 - Transpose_wxh(src, src_stride, dst, dst_stride, width, i); + TransposeWxH(src, src_stride, dst, dst_stride, width, i); } -void Rotate90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - src += src_stride*(height-1); +void RotatePlane90(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height) { + // Rotate by 90 is a transpose with the source read + // from bottom to top. So set the source pointer to the end + // of the buffer and flip the sign of the source stride. + src += src_stride * (height - 1); src_stride = -src_stride; - Transpose(src, src_stride, dst, dst_stride, width, height); + TransposePlane(src, src_stride, dst, dst_stride, width, height); } -void Rotate270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - dst += dst_stride*(width-1); +void RotatePlane270(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height) { + // Rotate by 270 is a transpose with the destination written + // from bottom to top. So set the destination pointer to the end + // of the buffer and flip the sign of the destination stride. + dst += dst_stride * (width - 1); dst_stride = -dst_stride; - Transpose(src, src_stride, dst, dst_stride, width, height); + TransposePlane(src, src_stride, dst, dst_stride, width, height); } void ReverseLine_C(const uint8* src, uint8* dst, int width) { int i; - for (i = 0; i < width; ++i) - dst[width-1 - i] = src[i]; + src += width; + for (i = 0; i < width; ++i) { + --src; + dst[i] = src[0]; + } } -void Rotate180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { +void RotatePlane180(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height) { int i; reverse_func ReverseLine; - // do processor detection here. + // TODO(frkoenig): do processor detection here. #ifdef __ARM_NEON__ ReverseLine = ReverseLine_NEON; #else ReverseLine = ReverseLine_C; #endif - dst += dst_stride*(height-1); + // Rotate by 180 is a mirror with the destination + // written in reverse. + dst += dst_stride * (height - 1); for (i = 0; i < height; ++i) { ReverseLine(src, dst, width); @@ -118,4 +146,269 @@ void Rotate180(const uint8* src, int src_stride, } } +static void TransposeUVWx8_C(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int w) { + int i, j; + for (i = 0; i < w * 2; i += 2) + for (j = 0; j < 8; ++j) { + dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)]; + dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1]; + } +} + +static void TransposeUVWxH_C(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int w, int h) { + int i, j; + for (i = 0; i < w*2; i += 2) + for (j = 0; j < h; ++j) { + dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)]; + dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1]; + } +} + +void TransposeUV(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width, int height) { + int i = height; + rotate_uv_wx8_func TransposeWx8; + rotate_uv_wxh_func TransposeWxH; + + // do processor detection here. +#ifdef __ARM_NEON__ + unsigned long long store_reg[8]; + SaveRegisters_NEON(store_reg); + TransposeWx8 = TransposeUVWx8_NEON; + TransposeWxH = TransposeUVWxH_C; +#else + TransposeWx8 = TransposeUVWx8_C; + TransposeWxH = TransposeUVWxH_C; +#endif + + // work through the source in 8x8 tiles + while (i >= 8) { + TransposeWx8(src, src_stride, + dst_a, dst_stride_a, + dst_b, dst_stride_b, + width); + + src += 8 * src_stride; // go down 8 rows + dst_a += 8; // move over 8 columns + dst_b += 8; // move over 8 columns + i -= 8; + } + + TransposeWxH(src, src_stride, + dst_a, dst_stride_a, + dst_b, dst_stride_b, + width, i); + +#ifdef __ARM_NEON__ + RestoreRegisters_NEON(store_reg); +#endif +} + +void RotateUV90(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width, int height) { + src += src_stride * (height - 1); + src_stride = -src_stride; + + TransposeUV(src, src_stride, + dst_a, dst_stride_a, + dst_b, dst_stride_b, + width, height); +} + +void RotateUV270(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width, int height) { + dst_a += dst_stride_a * (width - 1); + dst_b += dst_stride_b * (width - 1); + dst_stride_a = -dst_stride_a; + dst_stride_b = -dst_stride_b; + + TransposeUV(src, src_stride, + dst_a, dst_stride_a, + dst_b, dst_stride_b, + width, height); +} + +static void ReverseLineUV_C(const uint8* src, + uint8* dst_a, uint8* dst_b, + int width) { + int i; + src += width << 1; + for (i = 0; i < width; ++i) { + src -= 2; + dst_a[i] = src[0]; + dst_b[i] = src[1]; + } +} + +void RotateUV180(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width, int height) { + int i; + reverse_uv_func ReverseLine; + + // TODO(frkoenig) : do processor detection here. +#ifdef __ARM_NEON__ + ReverseLine = ReverseLineUV_NEON; +#else + ReverseLine = ReverseLineUV_C; +#endif + + dst_a += dst_stride_a * (height - 1); + dst_b += dst_stride_b * (height - 1); + + for (i = 0; i < height; ++i) { + ReverseLine(src, dst_a, dst_b, width); + + src += src_stride; // down one line at a time + dst_a -= dst_stride_a; // nominally up one line at a time + dst_b -= dst_stride_b; // nominally up one line at a time + } +} + +int I420Rotate(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height, + RotationMode mode) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + switch (mode) { + case kRotateNone: + // copy frame + return I420Copy(src_y, src_stride_y, + src_u, src_stride_u, + src_v, src_stride_v, + dst_y, dst_stride_y, + dst_u, dst_stride_u, + dst_v, dst_stride_v, + width, height); + case kRotateClockwise: + RotatePlane90(src_y, src_stride_y, + dst_y, dst_stride_y, + width, height); + RotatePlane90(src_u, src_stride_u, + dst_u, dst_stride_u, + halfwidth, halfheight); + RotatePlane90(src_v, src_stride_v, + dst_v, dst_stride_v, + halfwidth, halfheight); + return 0; + case kRotateCounterClockwise: + RotatePlane270(src_y, src_stride_y, + dst_y, dst_stride_y, + width, height); + RotatePlane270(src_u, src_stride_u, + dst_u, dst_stride_u, + halfwidth, halfheight); + RotatePlane270(src_v, src_stride_v, + dst_v, dst_stride_v, + halfwidth, halfheight); + return 0; + case kRotate180: + RotatePlane180(src_y, src_stride_y, + dst_y, dst_stride_y, + width, height); + RotatePlane180(src_u, src_stride_u, + dst_u, dst_stride_u, + halfwidth, halfheight); + RotatePlane180(src_v, src_stride_v, + dst_v, dst_stride_v, + halfwidth, halfheight); + return 0; + default: + break; + } + return -1; +} + +int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, + const uint8* src_uv, int src_stride_uv, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height, + RotationMode mode) { + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + + // Negative height means invert the image. + if (height < 0) { + height = -height; + halfheight = (height + 1) >> 1; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (halfheight - 1) * src_stride_uv; + src_stride_y = -src_stride_y; + src_stride_uv = -src_stride_uv; + } + + switch (mode) { + case kRotateNone: + // copy frame + return NV12ToI420(src_y, src_uv, src_stride_y, + dst_y, dst_stride_y, + dst_u, dst_stride_u, + dst_v, dst_stride_v, + width, height); + case kRotateClockwise: + RotatePlane90(src_y, src_stride_y, + dst_y, dst_stride_y, + width, height); + RotateUV90(src_uv, src_stride_uv, + dst_u, dst_stride_u, + dst_v, dst_stride_v, + halfwidth, halfheight); + return 0; + case kRotateCounterClockwise: + RotatePlane270(src_y, src_stride_y, + dst_y, dst_stride_y, + width, height); + RotateUV270(src_uv, src_stride_uv, + dst_u, dst_stride_u, + dst_v, dst_stride_v, + halfwidth, halfheight); + return 0; + case kRotate180: + RotatePlane180(src_y, src_stride_y, + dst_y, dst_stride_y, + width, height); + RotateUV180(src_uv, src_stride_uv, + dst_u, dst_stride_u, + dst_v, dst_stride_v, + halfwidth, halfheight); + return 0; + default: + break; + } + return -1; +} + } // namespace libyuv diff --git a/source/rotate.h b/source/rotate.h deleted file mode 100644 index f6a90ffe3..000000000 --- a/source/rotate.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef LIBYUV_SOURCE_ROTATE_H_ -#define LIBYUV_SOURCE_ROTATE_H_ - -#include "libyuv/basic_types.h" - -namespace libyuv { - -void Rotate90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); -void Rotate180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); -void Rotate270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); - -void Rotate90_deinterleave(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); -void Rotate180_deinterleave(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); -void Rotate270_deinterleave(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); - -void Transpose(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); -} // namespace libyuv - -#endif // LIBYUV_SOURCE_ROTATE_H_ diff --git a/source/rotate_deinterleave.cc b/source/rotate_deinterleave.cc deleted file mode 100644 index 071335d73..000000000 --- a/source/rotate_deinterleave.cc +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "rotate.h" - -namespace libyuv { - -typedef void (*reverse_func)(const uint8*, uint8*, uint8*, int); -typedef void (*rotate_wx8func)(const uint8*, int, - uint8*, int, - uint8*, int, int); -typedef void (*rotate_wxhfunc)(const uint8*, int, - uint8*, int, - uint8*, int, int, int); - -#ifdef __ARM_NEON__ -extern "C" { -void RestoreRegisters_NEON(unsigned long long *restore); -void ReverseLine_di_NEON(const uint8* src, - uint8* dst_a, uint8* dst_b, - int width); -void SaveRegisters_NEON(unsigned long long *store); -void Transpose_di_wx8_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width); -} // extern "C" -#endif - -static void Transpose_di_wx8_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int w) { - int i, j; - for (i = 0; i < w*2; i += 2) - for (j = 0; j < 8; ++j) { - dst_a[j + (i>>1)*dst_stride_a] = src[i + j*src_stride]; - dst_b[j + (i>>1)*dst_stride_b] = src[i + j*src_stride + 1]; - } -} - -static void Transpose_di_wxh_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int w, int h) { - int i, j; - for (i = 0; i < w*2; i += 2) - for (j = 0; j < h; ++j) { - dst_a[j + (i>>1)*dst_stride_a] = src[i + j*src_stride]; - dst_b[j + (i>>1)*dst_stride_b] = src[i + j*src_stride + 1]; - } -} - -void Transpose_deinterleave(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - int i = height; - rotate_wx8func Transpose_wx8; - rotate_wxhfunc Transpose_wxh; - - // do processor detection here. -#ifdef __ARM_NEON__ - unsigned long long store_reg[8]; - SaveRegisters_NEON(store_reg); - Transpose_wx8 = Transpose_di_wx8_NEON; - Transpose_wxh = Transpose_di_wxh_C; -#else - Transpose_wx8 = Transpose_di_wx8_C; - Transpose_wxh = Transpose_di_wxh_C; -#endif - - width >>= 1; - - // work across the source in 8x8 tiles - do { - Transpose_wx8(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width); - - src += 8 * src_stride; - dst_a += 8; - dst_b += 8; - i -= 8; - } while (i >= 8); - - Transpose_wxh(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, i); - -#ifdef __ARM_NEON__ - RestoreRegisters_NEON(store_reg); -#endif -} - -void Rotate90_deinterleave(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - src += src_stride*(height-1); - src_stride = -src_stride; - - Transpose_deinterleave(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, height); -} - -void Rotate270_deinterleave(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - dst_a += dst_stride_a*((width>>1)-1); - dst_b += dst_stride_b*((width>>1)-1); - dst_stride_a = -dst_stride_a; - dst_stride_b = -dst_stride_b; - - Transpose_deinterleave(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, height); -} - -static void ReverseLine_di_C(const uint8* src, - uint8* dst_a, uint8* dst_b, - int width) { - int i; - for (i = 0; i < width*2; i += 2) { - dst_a[width-1 - (i>>1)] = src[i]; - dst_b[width-1 - (i>>1)] = src[i+1]; - } -} - -void Rotate180_deinterleave(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - int i; - reverse_func ReverseLine; - - // do processor detection here. -#ifdef __ARM_NEON__ - ReverseLine = ReverseLine_di_NEON; -#else - ReverseLine = ReverseLine_di_C; -#endif - - dst_a += dst_stride_a*(height-1); - dst_b += dst_stride_b*(height-1); - - width >>= 1; - - for (i = 0; i < height; ++i) { - ReverseLine(src, dst_a, dst_b, width); - - src += src_stride; - dst_a -= dst_stride_a; - dst_b -= dst_stride_b; - } -} - -} // namespace libyuv diff --git a/source/rotate_deinterleave_neon.s b/source/rotate_deinterleave_neon.s deleted file mode 100644 index b5bb38517..000000000 --- a/source/rotate_deinterleave_neon.s +++ /dev/null @@ -1,310 +0,0 @@ - .global RestoreRegisters_NEON - .global ReverseLine_di_NEON - .global SaveRegisters_NEON - .global Transpose_di_wx8_NEON - .type RestoreRegisters_NEON, function - .type ReverseLine_di_NEON, function - .type SaveRegisters_NEON, function - .type Transpose_di_wx8_NEON, function - -@ void SaveRegisters_NEON (unsigned long long store) -@ r0 unsigned long long store -SaveRegisters_NEON: - vst1.i64 {d8, d9, d10, d11}, [r0]! - vst1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - -@ void RestoreRegisters_NEON (unsigned long long store) -@ r0 unsigned long long store -RestoreRegisters_NEON: - vld1.i64 {d8, d9, d10, d11}, [r0]! - vld1.i64 {d12, d13, d14, d15}, [r0]! - bx lr - - -@ void ReverseLine_NEON (const uint8* src, -@ uint8* dst_a, -@ uint8* dst_b, -@ int width) -@ r0 const uint8* src -@ r1 uint8* dst_a -@ r2 uint8* dst_b -@ r3 width -ReverseLine_di_NEON: - - @ compute where to start writing destination - add r1, r1, r3 @ dst_a + width - add r2, r2, r3 @ dst_b + width - - @ work on input segments that are multiples of 16, but - @ width that has been passed is output segments, half - @ the size of input. - lsrs r12, r3, #3 - - beq .line_residuals - - @ the output is written in to two blocks. - mov r12, #-8 - - @ back of destination by the size of the register that is - @ going to be reversed - sub r1, r1, #8 - sub r2, r2, #8 - - @ the loop needs to run on blocks of 16. what will be left - @ over is either a negative number, the residuals that need - @ to be done, or 0. if this isn't subtracted off here the - @ loop will run one extra time. - sub r3, r3, #8 - -.segments_of_8: - vld2.8 {d0, d1}, [r0]! @ src += 16 - - @ reverse the bytes in the 64 bit segments - vrev64.8 q0, q0 - - vst1.8 {d0}, [r1], r12 @ dst_a -= 8 - vst1.8 {d1}, [r2], r12 @ dst_b -= 8 - - subs r3, r3, #8 - bge .segments_of_8 - - @ add 16 back to the counter. if the result is 0 there is no - @ residuals so return - adds r3, r3, #8 - bxeq lr - - add r1, r1, #8 - add r2, r2, #8 - -.line_residuals: - - mov r12, #-1 - - sub r1, r1, #1 - sub r2, r2, #1 - -@ do this in neon registers as per -@ http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/ -.segments_of_2: - vld2.8 {d0[0], d1[0]}, [r0]! @ src += 2 - - vst1.8 {d0[0]}, [r1], r12 @ dst_a -= 1 - vst1.8 {d1[0]}, [r2], r12 @ dst_b -= 1 - - subs r3, r3, #1 - bgt .segments_of_2 - - bx lr - -@ void Transpose_di_wx8_NEON (const uint8* src, int src_pitch, -@ uint8* dst_a, int dst_pitch_a, -@ uint8* dst_b, int dst_pitch_b, -@ int width) -@ r0 const uint8* src -@ r1 int src_pitch -@ r2 uint8* dst_a -@ r3 int dst_pitch_a -@ stack uint8* dst_b -@ stack int dst_pitch_b -@ stack int width -Transpose_di_wx8_NEON: - push {r4-r9,lr} - - ldr r4, [sp, #28] @ dst_b - ldr r5, [sp, #32] @ dst_pitch_b - ldr r7, [sp, #36] @ width - @ loops are on blocks of 8. loop will stop when - @ counter gets to or below 0. starting the counter - @ at w-8 allow for this - sub r8, #8 - -@ handle 8x8 blocks. this should be the majority of the plane -.loop_8x8: - mov r9, r0 - - vld2.8 {d0, d1}, [r9], r1 - vld2.8 {d2, d3}, [r9], r1 - vld2.8 {d4, d5}, [r9], r1 - vld2.8 {d6, d7}, [r9], r1 - vld2.8 {d8, d9}, [r9], r1 - vld2.8 {d10, d11}, [r9], r1 - vld2.8 {d12, d13}, [r9], r1 - vld2.8 {d14, d15}, [r9] - - vtrn.8 q1, q0 - vtrn.8 q3, q2 - vtrn.8 q5, q4 - vtrn.8 q7, q6 - - vtrn.16 q1, q3 - vtrn.16 q0, q2 - vtrn.16 q5, q7 - vtrn.16 q4, q6 - - vtrn.32 q1, q5 - vtrn.32 q0, q4 - vtrn.32 q3, q7 - vtrn.32 q2, q6 - - vrev16.8 q0, q0 - vrev16.8 q1, q1 - vrev16.8 q2, q2 - vrev16.8 q3, q3 - vrev16.8 q4, q4 - vrev16.8 q5, q5 - vrev16.8 q6, q6 - vrev16.8 q7, q7 - - mov r9, r2 - - vst1.8 {d2}, [r9], r3 - vst1.8 {d0}, [r9], r3 - vst1.8 {d6}, [r9], r3 - vst1.8 {d4}, [r9], r3 - vst1.8 {d10}, [r9], r3 - vst1.8 {d8}, [r9], r3 - vst1.8 {d14}, [r9], r3 - vst1.8 {d12}, [r9] - - mov r9, r4 - - vst1.8 {d3}, [r9], r5 - vst1.8 {d1}, [r9], r5 - vst1.8 {d7}, [r9], r5 - vst1.8 {d5}, [r9], r5 - vst1.8 {d11}, [r9], r5 - vst1.8 {d9}, [r9], r5 - vst1.8 {d15}, [r9], r5 - vst1.8 {d13}, [r9] - - add r0, #8*2 @ src += 8*2 - add r2, r3, lsl #3 @ dst_a += 8 * dst_pitch_a - add r4, r5, lsl #3 @ dst_b += 8 * dst_pitch_b - subs r8, #8 @ w -= 8 - bge .loop_8x8 - - @ add 8 back to counter. if the result is 0 there are - @ no residuals. - adds r8, #8 - beq .done - - @ some residual, so between 1 and 7 lines left to transpose - cmp r8, #2 - blt .block_1x8 - - cmp r8, #4 - blt .block_2x8 - -@ TODO(frkoenig) : clean this up -.block_4x8: - mov r9, r0 - vld1.64 {d0}, [r9], r1 - vld1.64 {d1}, [r9], r1 - vld1.64 {d2}, [r9], r1 - vld1.64 {d3}, [r9], r1 - vld1.64 {d4}, [r9], r1 - vld1.64 {d5}, [r9], r1 - vld1.64 {d6}, [r9], r1 - vld1.64 {d7}, [r9] - - adr r12, vtbl_4x4_transpose - vld1.8 {q7}, [r12] - - vtrn.8 q0, q1 - vtrn.8 q2, q3 - - vtbl.8 d8, {d0, d1}, d14 - vtbl.8 d9, {d0, d1}, d15 - vtbl.8 d10, {d2, d3}, d14 - vtbl.8 d11, {d2, d3}, d15 - vtbl.8 d12, {d4, d5}, d14 - vtbl.8 d13, {d4, d5}, d15 - vtbl.8 d0, {d6, d7}, d14 - vtbl.8 d1, {d6, d7}, d15 - - mov r9, r2 - - vst1.32 {d8[0]}, [r9], r3 - vst1.32 {d8[1]}, [r9], r3 - vst1.32 {d9[0]}, [r9], r3 - vst1.32 {d9[1]}, [r9], r3 - - add r9, r2, #4 - vst1.32 {d12[0]}, [r9], r3 - vst1.32 {d12[1]}, [r9], r3 - vst1.32 {d13[0]}, [r9], r3 - vst1.32 {d13[1]}, [r9] - - mov r9, r4 - - vst1.32 {d10[0]}, [r9], r5 - vst1.32 {d10[1]}, [r9], r5 - vst1.32 {d11[0]}, [r9], r5 - vst1.32 {d11[1]}, [r9], r5 - - add r9, r4, #4 - vst1.32 {d0[0]}, [r9], r5 - vst1.32 {d0[1]}, [r9], r5 - vst1.32 {d1[0]}, [r9], r5 - vst1.32 {d1[1]}, [r9] - - add r0, #4*2 @ src += 4 * 2 - add r2, r3, lsl #2 @ dst_a += 4 * dst_pitch_a - add r4, r5, lsl #2 @ dst_b += 4 * dst_pitch_b - subs r8, #4 @ w -= 4 - beq .done - - @ some residual, check to see if it includes a 2x8 block, - @ or less - cmp r8, #2 - blt .block_1x8 - -.block_2x8: - mov r9, r0 - vld2.16 {d0[0], d2[0]}, [r9], r1 - vld2.16 {d1[0], d3[0]}, [r9], r1 - vld2.16 {d0[1], d2[1]}, [r9], r1 - vld2.16 {d1[1], d3[1]}, [r9], r1 - vld2.16 {d0[2], d2[2]}, [r9], r1 - vld2.16 {d1[2], d3[2]}, [r9], r1 - vld2.16 {d0[3], d2[3]}, [r9], r1 - vld2.16 {d1[3], d3[3]}, [r9] - - vtrn.8 d0, d1 - vtrn.8 d2, d3 - - mov r9, r2 - - vst1.64 {d0}, [r9], r3 - vst1.64 {d2}, [r9] - - mov r9, r4 - - vst1.64 {d1}, [r9], r5 - vst1.64 {d3}, [r9] - - add r0, #2*2 @ src += 2 * 2 - add r2, r3, lsl #1 @ dst_a += 2 * dst_pitch_a - add r4, r5, lsl #1 @ dst_a += 2 * dst_pitch_a - subs r8, #2 @ w -= 2 - beq .done - -.block_1x8: - vld2.8 {d0[0], d1[0]}, [r0], r1 - vld2.8 {d0[1], d1[1]}, [r0], r1 - vld2.8 {d0[2], d1[2]}, [r0], r1 - vld2.8 {d0[3], d1[3]}, [r0], r1 - vld2.8 {d0[4], d1[4]}, [r0], r1 - vld2.8 {d0[5], d1[5]}, [r0], r1 - vld2.8 {d0[6], d1[6]}, [r0], r1 - vld2.8 {d0[7], d1[7]}, [r0] - - vst1.64 {d0}, [r2] - vst1.64 {d1}, [r4] - -.done: - pop {r4-r9, pc} - -vtbl_4x4_transpose: - .byte 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 diff --git a/source/rotate_neon.s b/source/rotate_neon.s index fc29f56d7..75ea957a6 100644 --- a/source/rotate_neon.s +++ b/source/rotate_neon.s @@ -1,7 +1,15 @@ + .global RestoreRegisters_NEON .global ReverseLine_NEON - .global Transpose_wx8_NEON + .global ReverseLineUV_NEON + .global SaveRegisters_NEON + .global TransposeWx8_NEON + .global TransposeUVWx8_NEON + .type RestoreRegisters_NEON, function .type ReverseLine_NEON, function - .type Transpose_wx8_NEON, function + .type ReverseLineUV_NEON, function + .type SaveRegisters_NEON, function + .type TransposeWx8_NEON, function + .type TransposeUVWx8_NEON, function @ void ReverseLine_NEON (const uint8* src, uint8* dst, int width) @ r0 const uint8* src @@ -23,7 +31,7 @@ ReverseLine_NEON: @ along with 16 to get the next location. mov r3, #-24 - beq .line_residuals + beq Lline_residuals @ back of destination by the size of the register that is @ going to be reversed @@ -35,7 +43,7 @@ ReverseLine_NEON: @ loop will run one extra time. sub r2, #16 -.segments_of_16: +Lsegments_of_16: vld1.8 {q0}, [r0]! @ src += 16 @ reverse the bytes in the 64 bit segments. unable to reverse @@ -48,7 +56,7 @@ ReverseLine_NEON: vst1.8 {d0}, [r1], r3 @ dst -= 16 subs r2, #16 - bge .segments_of_16 + bge Lsegments_of_16 @ add 16 back to the counter. if the result is 0 there is no @ residuals so return @@ -57,7 +65,7 @@ ReverseLine_NEON: add r1, #16 -.line_residuals: +Lline_residuals: mov r3, #-3 @@ -65,38 +73,38 @@ ReverseLine_NEON: subs r2, #2 @ check for 16*n+1 scenarios where segments_of_2 should not @ be run, but there is something left over. - blt .segment_of_1 + blt Lsegment_of_1 @ do this in neon registers as per @ http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/ -.segments_of_2: +Lsegments_of_2: vld2.8 {d0[0], d1[0]}, [r0]! @ src += 2 vst1.8 {d1[0]}, [r1]! vst1.8 {d0[0]}, [r1], r3 @ dst -= 2 subs r2, #2 - bge .segments_of_2 + bge Lsegments_of_2 adds r2, #2 bxeq lr -.segment_of_1: +Lsegment_of_1: add r1, #1 vld1.8 {d0[0]}, [r0] vst1.8 {d0[0]}, [r1] bx lr -@ void Transpose_wx8_NEON (const uint8* src, int src_pitch, -@ uint8* dst, int dst_pitch, -@ int w) +@ void TransposeWx8_NEON (const uint8* src, int src_stride, +@ uint8* dst, int dst_stride, +@ int w) @ r0 const uint8* src -@ r1 int src_pitch +@ r1 int src_stride @ r2 uint8* dst -@ r3 int dst_pitch +@ r3 int dst_stride @ stack int w -Transpose_wx8_NEON: +TransposeWx8_NEON: push {r4,r8,r9,lr} ldr r8, [sp, #16] @ width @@ -107,7 +115,7 @@ Transpose_wx8_NEON: sub r8, #8 @ handle 8x8 blocks. this should be the majority of the plane -.loop_8x8: +Lloop_8x8: mov r9, r0 vld1.8 {d0}, [r9], r1 @@ -151,23 +159,23 @@ Transpose_wx8_NEON: vst1.8 {d6}, [r9] add r0, #8 @ src += 8 - add r2, r3, lsl #3 @ dst += 8 * dst_pitch + add r2, r3, lsl #3 @ dst += 8 * dst_stride subs r8, #8 @ w -= 8 - bge .loop_8x8 + bge Lloop_8x8 @ add 8 back to counter. if the result is 0 there are @ no residuals. adds r8, #8 - beq .done + beq Ldone @ some residual, so between 1 and 7 lines left to transpose cmp r8, #2 - blt .block_1x8 + blt Lblock_1x8 cmp r8, #4 - blt .block_2x8 + blt Lblock_2x8 -.block_4x8: +Lblock_4x8: mov r9, r0 vld1.32 {d0[0]}, [r9], r1 vld1.32 {d0[1]}, [r9], r1 @@ -202,16 +210,16 @@ Transpose_wx8_NEON: vst1.32 {d1[1]}, [r9] add r0, #4 @ src += 4 - add r2, r3, lsl #2 @ dst += 4 * dst_pitch + add r2, r3, lsl #2 @ dst += 4 * dst_stride subs r8, #4 @ w -= 4 - beq .done + beq Ldone @ some residual, check to see if it includes a 2x8 block, @ or less cmp r8, #2 - blt .block_1x8 + blt Lblock_1x8 -.block_2x8: +Lblock_2x8: mov r9, r0 vld1.16 {d0[0]}, [r9], r1 vld1.16 {d1[0]}, [r9], r1 @@ -230,11 +238,11 @@ Transpose_wx8_NEON: vst1.64 {d1}, [r9] add r0, #2 @ src += 2 - add r2, r3, lsl #1 @ dst += 2 * dst_pitch + add r2, r3, lsl #1 @ dst += 2 * dst_stride subs r8, #2 @ w -= 2 - beq .done + beq Ldone -.block_1x8: +Lblock_1x8: vld1.8 {d0[0]}, [r0], r1 vld1.8 {d0[1]}, [r0], r1 vld1.8 {d0[2]}, [r0], r1 @@ -246,9 +254,310 @@ Transpose_wx8_NEON: vst1.64 {d0}, [r2] -.done: +Ldone: pop {r4,r8,r9,pc} vtbl_4x4_transpose: .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 + +@ void SaveRegisters_NEON (unsigned long long store) +@ r0 unsigned long long store +SaveRegisters_NEON: + vst1.i64 {d8, d9, d10, d11}, [r0]! + vst1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + +@ void RestoreRegisters_NEON (unsigned long long store) +@ r0 unsigned long long store +RestoreRegisters_NEON: + vld1.i64 {d8, d9, d10, d11}, [r0]! + vld1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + +@ void ReverseLineUV_NEON (const uint8* src, +@ uint8* dst_a, +@ uint8* dst_b, +@ int width) +@ r0 const uint8* src +@ r1 uint8* dst_a +@ r2 uint8* dst_b +@ r3 width +ReverseLineUV_NEON: + + @ compute where to start writing destination + add r1, r1, r3 @ dst_a + width + add r2, r2, r3 @ dst_b + width + + @ work on input segments that are multiples of 16, but + @ width that has been passed is output segments, half + @ the size of input. + lsrs r12, r3, #3 + + beq Lline_residuals_di + + @ the output is written in to two blocks. + mov r12, #-8 + + @ back of destination by the size of the register that is + @ going to be reversed + sub r1, r1, #8 + sub r2, r2, #8 + + @ the loop needs to run on blocks of 8. what will be left + @ over is either a negative number, the residuals that need + @ to be done, or 0. if this isn't subtracted off here the + @ loop will run one extra time. + sub r3, r3, #8 + +Lsegments_of_8_di: + vld2.8 {d0, d1}, [r0]! @ src += 16 + + @ reverse the bytes in the 64 bit segments + vrev64.8 q0, q0 + + vst1.8 {d0}, [r1], r12 @ dst_a -= 8 + vst1.8 {d1}, [r2], r12 @ dst_b -= 8 + + subs r3, r3, #8 + bge Lsegments_of_8_di + + @ add 8 back to the counter. if the result is 0 there is no + @ residuals so return + adds r3, r3, #8 + bxeq lr + + add r1, r1, #8 + add r2, r2, #8 + +Lline_residuals_di: + + mov r12, #-1 + + sub r1, r1, #1 + sub r2, r2, #1 + +@ do this in neon registers as per +@ http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/ +Lsegments_of_1: + vld2.8 {d0[0], d1[0]}, [r0]! @ src += 2 + + vst1.8 {d0[0]}, [r1], r12 @ dst_a -= 1 + vst1.8 {d1[0]}, [r2], r12 @ dst_b -= 1 + + subs r3, r3, #1 + bgt Lsegments_of_1 + + bx lr + +@ void TransposeUVWx8_NEON (const uint8* src, int src_stride, +@ uint8* dst_a, int dst_stride_a, +@ uint8* dst_b, int dst_stride_b, +@ int width) +@ r0 const uint8* src +@ r1 int src_stride +@ r2 uint8* dst_a +@ r3 int dst_stride_a +@ stack uint8* dst_b +@ stack int dst_stride_b +@ stack int width +TransposeUVWx8_NEON: + push {r4-r9,lr} + + ldr r4, [sp, #28] @ dst_b + ldr r5, [sp, #32] @ dst_stride_b + ldr r8, [sp, #36] @ width + @ loops are on blocks of 8. loop will stop when + @ counter gets to or below 0. starting the counter + @ at w-8 allow for this + sub r8, #8 + +@ handle 8x8 blocks. this should be the majority of the plane +Lloop_8x8_di: + mov r9, r0 + + vld2.8 {d0, d1}, [r9], r1 + vld2.8 {d2, d3}, [r9], r1 + vld2.8 {d4, d5}, [r9], r1 + vld2.8 {d6, d7}, [r9], r1 + vld2.8 {d8, d9}, [r9], r1 + vld2.8 {d10, d11}, [r9], r1 + vld2.8 {d12, d13}, [r9], r1 + vld2.8 {d14, d15}, [r9] + + vtrn.8 q1, q0 + vtrn.8 q3, q2 + vtrn.8 q5, q4 + vtrn.8 q7, q6 + + vtrn.16 q1, q3 + vtrn.16 q0, q2 + vtrn.16 q5, q7 + vtrn.16 q4, q6 + + vtrn.32 q1, q5 + vtrn.32 q0, q4 + vtrn.32 q3, q7 + vtrn.32 q2, q6 + + vrev16.8 q0, q0 + vrev16.8 q1, q1 + vrev16.8 q2, q2 + vrev16.8 q3, q3 + vrev16.8 q4, q4 + vrev16.8 q5, q5 + vrev16.8 q6, q6 + vrev16.8 q7, q7 + + mov r9, r2 + + vst1.8 {d2}, [r9], r3 + vst1.8 {d0}, [r9], r3 + vst1.8 {d6}, [r9], r3 + vst1.8 {d4}, [r9], r3 + vst1.8 {d10}, [r9], r3 + vst1.8 {d8}, [r9], r3 + vst1.8 {d14}, [r9], r3 + vst1.8 {d12}, [r9] + + mov r9, r4 + + vst1.8 {d3}, [r9], r5 + vst1.8 {d1}, [r9], r5 + vst1.8 {d7}, [r9], r5 + vst1.8 {d5}, [r9], r5 + vst1.8 {d11}, [r9], r5 + vst1.8 {d9}, [r9], r5 + vst1.8 {d15}, [r9], r5 + vst1.8 {d13}, [r9] + + add r0, #8*2 @ src += 8*2 + add r2, r3, lsl #3 @ dst_a += 8 * dst_stride_a + add r4, r5, lsl #3 @ dst_b += 8 * dst_stride_b + subs r8, #8 @ w -= 8 + bge Lloop_8x8_di + + @ add 8 back to counter. if the result is 0 there are + @ no residuals. + adds r8, #8 + beq Ldone_di + + @ some residual, so between 1 and 7 lines left to transpose + cmp r8, #2 + blt Lblock_1x8_di + + cmp r8, #4 + blt Lblock_2x8_di + +@ TODO(frkoenig) : clean this up +Lblock_4x8_di: + mov r9, r0 + vld1.64 {d0}, [r9], r1 + vld1.64 {d1}, [r9], r1 + vld1.64 {d2}, [r9], r1 + vld1.64 {d3}, [r9], r1 + vld1.64 {d4}, [r9], r1 + vld1.64 {d5}, [r9], r1 + vld1.64 {d6}, [r9], r1 + vld1.64 {d7}, [r9] + + adr r12, vtbl_4x4_transpose_di + vld1.8 {q7}, [r12] + + vtrn.8 q0, q1 + vtrn.8 q2, q3 + + vtbl.8 d8, {d0, d1}, d14 + vtbl.8 d9, {d0, d1}, d15 + vtbl.8 d10, {d2, d3}, d14 + vtbl.8 d11, {d2, d3}, d15 + vtbl.8 d12, {d4, d5}, d14 + vtbl.8 d13, {d4, d5}, d15 + vtbl.8 d0, {d6, d7}, d14 + vtbl.8 d1, {d6, d7}, d15 + + mov r9, r2 + + vst1.32 {d8[0]}, [r9], r3 + vst1.32 {d8[1]}, [r9], r3 + vst1.32 {d9[0]}, [r9], r3 + vst1.32 {d9[1]}, [r9], r3 + + add r9, r2, #4 + vst1.32 {d12[0]}, [r9], r3 + vst1.32 {d12[1]}, [r9], r3 + vst1.32 {d13[0]}, [r9], r3 + vst1.32 {d13[1]}, [r9] + + mov r9, r4 + + vst1.32 {d10[0]}, [r9], r5 + vst1.32 {d10[1]}, [r9], r5 + vst1.32 {d11[0]}, [r9], r5 + vst1.32 {d11[1]}, [r9], r5 + + add r9, r4, #4 + vst1.32 {d0[0]}, [r9], r5 + vst1.32 {d0[1]}, [r9], r5 + vst1.32 {d1[0]}, [r9], r5 + vst1.32 {d1[1]}, [r9] + + add r0, #4*2 @ src += 4 * 2 + add r2, r3, lsl #2 @ dst_a += 4 * dst_stride_a + add r4, r5, lsl #2 @ dst_b += 4 * dst_stride_b + subs r8, #4 @ w -= 4 + beq Ldone_di + + @ some residual, check to see if it includes a 2x8 block, + @ or less + cmp r8, #2 + blt Lblock_1x8_di + +Lblock_2x8_di: + mov r9, r0 + vld2.16 {d0[0], d2[0]}, [r9], r1 + vld2.16 {d1[0], d3[0]}, [r9], r1 + vld2.16 {d0[1], d2[1]}, [r9], r1 + vld2.16 {d1[1], d3[1]}, [r9], r1 + vld2.16 {d0[2], d2[2]}, [r9], r1 + vld2.16 {d1[2], d3[2]}, [r9], r1 + vld2.16 {d0[3], d2[3]}, [r9], r1 + vld2.16 {d1[3], d3[3]}, [r9] + + vtrn.8 d0, d1 + vtrn.8 d2, d3 + + mov r9, r2 + + vst1.64 {d0}, [r9], r3 + vst1.64 {d2}, [r9] + + mov r9, r4 + + vst1.64 {d1}, [r9], r5 + vst1.64 {d3}, [r9] + + add r0, #2*2 @ src += 2 * 2 + add r2, r3, lsl #1 @ dst_a += 2 * dst_stride_a + add r4, r5, lsl #1 @ dst_a += 2 * dst_stride_a + subs r8, #2 @ w -= 2 + beq Ldone_di + +Lblock_1x8_di: + vld2.8 {d0[0], d1[0]}, [r0], r1 + vld2.8 {d0[1], d1[1]}, [r0], r1 + vld2.8 {d0[2], d1[2]}, [r0], r1 + vld2.8 {d0[3], d1[3]}, [r0], r1 + vld2.8 {d0[4], d1[4]}, [r0], r1 + vld2.8 {d0[5], d1[5]}, [r0], r1 + vld2.8 {d0[6], d1[6]}, [r0], r1 + vld2.8 {d0[7], d1[7]}, [r0] + + vst1.64 {d0}, [r2] + vst1.64 {d1}, [r4] + +Ldone_di: + pop {r4-r9, pc} + +vtbl_4x4_transpose_di: + .byte 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 diff --git a/source/rotate_priv.h b/source/rotate_priv.h new file mode 100644 index 000000000..b4df14941 --- /dev/null +++ b/source/rotate_priv.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef SOURCE_ROTATE_PRIV_H_ +#define SOURCE_ROTATE_PRIV_H_ + +#include "libyuv/basic_types.h" + +namespace libyuv { + +// Rotate planes by 90, 180, 270 +void +RotatePlane90(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height); + +void +RotatePlane180(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height); + +void +RotatePlane270(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height); + +void +RotateUV90(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width, int height); + +// Rotations for when U and V are interleaved. +// These functions take one input pointer and +// split the data into two buffers while +// rotating them. +void +RotateUV180(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width, int height); + +void +RotateUV270(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width, int height); + +// The 90 and 270 functions are based on transposes. +// Doing a transpose with reversing the read/write +// order will result in a rotation by +- 90 degrees. +void +TransposePlane(const uint8* src, int src_stride, + uint8* dst, int dst_stride, + int width, int height); + +void +TransposeUV(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int width, int height); + +} // namespace libyuv + +#endif // SOURCE_ROTATE_PRIV_H_ diff --git a/unit_test/rotate_test.cc b/unit_test/rotate_test.cc index 6245ada2d..1c295b086 100644 --- a/unit_test/rotate_test.cc +++ b/unit_test/rotate_test.cc @@ -8,9 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "libyuv/rotate.h" +#include "../source/rotate_priv.h" #include "unit_test.h" -#include "rotate.h" #include +#include using namespace libyuv; @@ -19,7 +21,7 @@ void print_array(uint8 *array, int w, int h) { for (i = 0; i < h; ++i) { for (j = 0; j < w; ++j) - printf("%4d", array[i*w + j]); + printf("%4d", (signed char)array[(i * w) + j]); printf("\n"); } @@ -39,20 +41,17 @@ TEST_F(libyuvTest, Transpose) { ow = ih; oh = iw; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_1 = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_2 = static_cast(malloc(sizeof(uint8)*iw*ih)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_1 = static_cast(calloc(ow * oh, sizeof(uint8))); + output_2 = static_cast(calloc(iw * ih, sizeof(uint8))); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) input[i] = i; - output_1[i] = 0; - output_2[i] = 0; - } - Transpose(input, iw, output_1, ow, iw, ih); - Transpose(output_1, ow, output_2, oh, ow, oh); + TransposePlane(input, iw, output_1, ow, iw, ih); + TransposePlane(output_1, ow, output_2, oh, ow, oh); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) { if (input[i] != output_2[i]) err++; } @@ -76,7 +75,67 @@ TEST_F(libyuvTest, Transpose) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate90) { +TEST_F(libyuvTest, TransposeUV) { + int iw, ih, ow, oh; + int err = 0; + + for (iw = 16; iw < _rotate_max_w && !err; iw += 2) + for (ih = 8; ih < _rotate_max_h && !err; ++ih) { + int i; + uint8 *input; + uint8 *output_a1, *output_b1; + uint8 *output_a2, *output_b2; + + ow = ih; + oh = iw >> 1; + + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_a1 = static_cast(calloc(ow * oh, sizeof(uint8))); + output_b1 = static_cast(calloc(ow * oh, sizeof(uint8))); + output_a2 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_b2 = static_cast(calloc(iw * ih, sizeof(uint8))); + + for (i = 0; i < (iw * ih); i += 2) { + input[i] = i >> 1; + input[i + 1] = -(i >> 1); + } + + TransposeUV(input, iw, output_a1, ow, output_b1, ow, iw >> 1, ih); + + TransposePlane(output_a1, ow, output_a2, oh, ow, oh); + TransposePlane(output_b1, ow, output_b2, oh, ow, oh); + + for (i = 0; i < (iw * ih); i += 2) { + if (input[i] != output_a2[i >> 1]) + err++; + if (input[i + 1] != output_b2[i >> 1]) + err++; + } + + if (err) { + printf("input %dx%d \n", iw, ih); + print_array(input, iw, ih); + + printf("transpose 1\n"); + print_array(output_a1, ow, oh); + print_array(output_b1, ow, oh); + + printf("transpose 2\n"); + print_array(output_a2, oh, ow); + print_array(output_b2, oh, ow); + } + + free(input); + free(output_a1); + free(output_b1); + free(output_a2); + free(output_b2); + } + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, RotatePlane90) { int iw, ih, ow, oh; int err = 0; @@ -92,26 +151,21 @@ TEST_F(libyuvTest, Rotate90) { ow = ih; oh = iw; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0 = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_90 = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_180 = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_270 = static_cast(malloc(sizeof(uint8)*ow*oh)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_90 = static_cast(calloc(ow * oh, sizeof(uint8))); + output_180 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_270 = static_cast(calloc(ow * oh, sizeof(uint8))); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) input[i] = i; - output_0[i] = 0; - output_90[i] = 0; - output_180[i] = 0; - output_270[i] = 0; - } - Rotate90(input, iw, output_90, ow, iw, ih); - Rotate90(output_90, ow, output_180, oh, ow, oh); - Rotate90(output_180, oh, output_270, ow, oh, ow); - Rotate90(output_270, ow, output_0, iw, ow, oh); + RotatePlane90(input, iw, output_90, ow, iw, ih); + RotatePlane90(output_90, ow, output_180, oh, ow, oh); + RotatePlane90(output_180, oh, output_270, ow, oh, ow); + RotatePlane90(output_270, ow, output_0, iw, ow, oh); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) { if (input[i] != output_0[i]) err++; } @@ -143,7 +197,7 @@ TEST_F(libyuvTest, Rotate90) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate90Deinterleave) { +TEST_F(libyuvTest, RotateUV90) { int iw, ih, ow, oh; int err = 0; @@ -159,42 +213,30 @@ TEST_F(libyuvTest, Rotate90Deinterleave) { uint8 *output_180_v; ow = ih; - oh = iw>>1; + oh = iw >> 1; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_0_v = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_90_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_90_v = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_180_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_180_v = static_cast(malloc(sizeof(uint8)*ow*oh)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_0_v = static_cast(calloc(ow * oh, sizeof(uint8))); + output_90_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_90_v = static_cast(calloc(ow * oh, sizeof(uint8))); + output_180_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_180_v = static_cast(calloc(ow * oh, sizeof(uint8))); - for (i = 0; i < iw*ih; i +=2) { - input[i] = i>>1; - input[i+1] = -(i>>1); + for (i = 0; i < (iw * ih); i += 2) { + input[i] = i >> 1; + input[i + 1] = -(i >> 1); } - for (i = 0; i < ow*oh; ++i) { - output_0_u[i] = 0; - output_0_v[i] = 0; - output_90_u[i] = 0; - output_90_v[i] = 0; - output_180_u[i] = 0; - output_180_v[i] = 0; - } + RotateUV90(input, iw, output_90_u, ow, output_90_v, ow, iw >> 1, ih); - Rotate90_deinterleave(input, iw, - output_90_u, ow, - output_90_v, ow, - iw, ih); + RotatePlane90(output_90_u, ow, output_180_u, oh, ow, oh); + RotatePlane90(output_90_v, ow, output_180_v, oh, ow, oh); - Rotate90(output_90_u, ow, output_180_u, oh, ow, oh); - Rotate90(output_90_v, ow, output_180_v, oh, ow, oh); + RotatePlane180(output_180_u, ow, output_0_u, ow, ow, oh); + RotatePlane180(output_180_v, ow, output_0_v, ow, ow, oh); - Rotate180(output_180_u, ow, output_0_u, ow, ow, oh); - Rotate180(output_180_v, ow, output_0_v, ow, ow, oh); - - for (i = 0; i < ow*oh; ++i) { + for (i = 0; i < (ow * oh); ++i) { if (output_0_u[i] != (uint8)i) err++; if (output_0_v[i] != (uint8)(-i)) @@ -236,7 +278,7 @@ TEST_F(libyuvTest, Rotate90Deinterleave) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate180Deinterleave) { +TEST_F(libyuvTest, RotateUV180) { int iw, ih, ow, oh; int err = 0; @@ -251,43 +293,31 @@ TEST_F(libyuvTest, Rotate180Deinterleave) { uint8 *output_180_u; uint8 *output_180_v; - ow = iw>>1; + ow = iw >> 1; oh = ih; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_0_v = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_90_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_90_v = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_180_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_180_v = static_cast(malloc(sizeof(uint8)*ow*oh)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_0_v = static_cast(calloc(ow * oh, sizeof(uint8))); + output_90_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_90_v = static_cast(calloc(ow * oh, sizeof(uint8))); + output_180_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_180_v = static_cast(calloc(ow * oh, sizeof(uint8))); - for (i = 0; i < iw*ih; i +=2) { - input[i] = i>>1; - input[i+1] = -(i>>1); + for (i = 0; i < (iw * ih); i += 2) { + input[i] = i >> 1; + input[i + 1] = -(i >> 1); } - for (i = 0; i < ow*oh; ++i) { - output_0_u[i] = 0; - output_0_v[i] = 0; - output_90_u[i] = 0; - output_90_v[i] = 0; - output_180_u[i] = 0; - output_180_v[i] = 0; - } + RotateUV180(input, iw, output_180_u, ow, output_180_v, ow, iw >> 1, ih); - Rotate180_deinterleave(input, iw, - output_180_u, ow, - output_180_v, ow, - iw, ih); + RotatePlane90(output_180_u, ow, output_90_u, oh, ow, oh); + RotatePlane90(output_180_v, ow, output_90_v, oh, ow, oh); - Rotate90(output_180_u, ow, output_90_u, oh, ow, oh); - Rotate90(output_180_v, ow, output_90_v, oh, ow, oh); + RotatePlane90(output_90_u, oh, output_0_u, ow, oh, ow); + RotatePlane90(output_90_v, oh, output_0_v, ow, oh, ow); - Rotate90(output_90_u, oh, output_0_u, ow, oh, ow); - Rotate90(output_90_v, oh, output_0_v, ow, oh, ow); - - for (i = 0; i < ow*oh; ++i) { + for (i = 0; i < (ow * oh); ++i) { if (output_0_u[i] != (uint8)i) err++; if (output_0_v[i] != (uint8)(-i)) @@ -329,7 +359,7 @@ TEST_F(libyuvTest, Rotate180Deinterleave) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate270Deinterleave) { +TEST_F(libyuvTest, RotateUV270) { int iw, ih, ow, oh; int err = 0; @@ -345,42 +375,31 @@ TEST_F(libyuvTest, Rotate270Deinterleave) { uint8 *output_180_v; ow = ih; - oh = iw>>1; + oh = iw >> 1; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_0_v = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_270_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_270_v = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_180_u = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_180_v = static_cast(malloc(sizeof(uint8)*ow*oh)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_0_v = static_cast(calloc(ow * oh, sizeof(uint8))); + output_270_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_270_v = static_cast(calloc(ow * oh, sizeof(uint8))); + output_180_u = static_cast(calloc(ow * oh, sizeof(uint8))); + output_180_v = static_cast(calloc(ow * oh, sizeof(uint8))); - for (i = 0; i < iw*ih; i +=2) { - input[i] = i>>1; - input[i+1] = -(i>>1); + for (i = 0; i < (iw * ih); i += 2) { + input[i] = i >> 1; + input[i + 1] = -(i >> 1); } - for (i = 0; i < ow*oh; ++i) { - output_0_u[i] = 0; - output_0_v[i] = 0; - output_270_u[i] = 0; - output_270_v[i] = 0; - output_180_u[i] = 0; - output_180_v[i] = 0; - } + RotateUV270(input, iw, output_270_u, ow, output_270_v, ow, + iw >> 1, ih); - Rotate270_deinterleave(input, iw, - output_270_u, ow, - output_270_v, ow, - iw, ih); + RotatePlane270(output_270_u, ow, output_180_u, oh, ow, oh); + RotatePlane270(output_270_v, ow, output_180_v, oh, ow, oh); - Rotate270(output_270_u, ow, output_180_u, oh, ow, oh); - Rotate270(output_270_v, ow, output_180_v, oh, ow, oh); + RotatePlane180(output_180_u, ow, output_0_u, ow, ow, oh); + RotatePlane180(output_180_v, ow, output_0_v, ow, ow, oh); - Rotate180(output_180_u, ow, output_0_u, ow, ow, oh); - Rotate180(output_180_v, ow, output_0_v, ow, ow, oh); - - for (i = 0; i < ow*oh; ++i) { + for (i = 0; i < (ow * oh); ++i) { if (output_0_u[i] != (uint8)i) err++; if (output_0_v[i] != (uint8)(-i)) @@ -422,7 +441,7 @@ TEST_F(libyuvTest, Rotate270Deinterleave) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate180) { +TEST_F(libyuvTest, RotatePlane180) { int iw, ih, ow, oh; int err = 0; @@ -436,20 +455,17 @@ TEST_F(libyuvTest, Rotate180) { ow = iw; oh = ih; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0 = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_180 = static_cast(malloc(sizeof(uint8)*iw*ih)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_180 = static_cast(calloc(iw * ih, sizeof(uint8))); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) input[i] = i; - output_0[i] = 0; - output_180[i] = 0; - } - Rotate180(input, iw, output_180, ow, iw, ih); - Rotate180(output_180, ow, output_0, iw, ow, oh); + RotatePlane180(input, iw, output_180, ow, iw, ih); + RotatePlane180(output_180, ow, output_0, iw, ow, oh); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) { if (input[i] != output_0[i]) err++; } @@ -473,7 +489,7 @@ TEST_F(libyuvTest, Rotate180) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate270) { +TEST_F(libyuvTest, RotatePlane270) { int iw, ih, ow, oh; int err = 0; @@ -489,26 +505,21 @@ TEST_F(libyuvTest, Rotate270) { ow = ih; oh = iw; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0 = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_90 = static_cast(malloc(sizeof(uint8)*ow*oh)); - output_180 = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_270 = static_cast(malloc(sizeof(uint8)*ow*oh)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_90 = static_cast(calloc(ow * oh, sizeof(uint8))); + output_180 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_270 = static_cast(calloc(ow * oh, sizeof(uint8))); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) input[i] = i; - output_0[i] = 0; - output_90[i] = 0; - output_180[i] = 0; - output_270[i] = 0; - } - Rotate270(input, iw, output_270, ow, iw, ih); - Rotate270(output_270, ow, output_180, oh, ow, oh); - Rotate270(output_180, oh, output_90, ow, oh, ow); - Rotate270(output_90, ow, output_0, iw, ow, oh); + RotatePlane270(input, iw, output_270, ow, iw, ih); + RotatePlane270(output_270, ow, output_180, oh, ow, oh); + RotatePlane270(output_180, oh, output_90, ow, oh, ow); + RotatePlane270(output_90, ow, output_0, iw, ow, oh); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) { if (input[i] != output_0[i]) err++; } @@ -540,7 +551,7 @@ TEST_F(libyuvTest, Rotate270) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate90and270) { +TEST_F(libyuvTest, RotatePlane90and270) { int iw, ih, ow, oh; int err = 0; @@ -553,20 +564,17 @@ TEST_F(libyuvTest, Rotate90and270) { ow = ih; oh = iw; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0 = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_90 = static_cast(malloc(sizeof(uint8)*ow*oh)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_90 = static_cast(calloc(ow * oh, sizeof(uint8))); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) input[i] = i; - output_0[i] = 0; - output_90[i] = 0; - } - Rotate90(input, iw, output_90, ow, iw, ih); - Rotate270(output_90, ow, output_0, iw, ow, oh); + RotatePlane90(input, iw, output_90, ow, iw, ih); + RotatePlane270(output_90, ow, output_0, iw, ow, oh); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) { if (input[i] != output_0[i]) err++; } @@ -590,8 +598,8 @@ TEST_F(libyuvTest, Rotate90and270) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate90Pitch) { - int iw, ih, ow, oh; +TEST_F(libyuvTest, RotatePlane90Pitch) { + int iw, ih; int err = 0; for (iw = 16; iw < _rotate_max_w && !err; iw += 4) @@ -600,31 +608,32 @@ TEST_F(libyuvTest, Rotate90Pitch) { uint8 *input; uint8 *output_0; uint8 *output_90; - ow = ih; - oh = iw; + int ow = ih; + int oh = iw; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0 = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_90 = static_cast(malloc(sizeof(uint8)*ow*oh)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_90 = static_cast(calloc(ow * oh, sizeof(uint8))); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) input[i] = i; - output_0[i] = 0; - output_90[i] = 0; - } - Rotate90(input, iw, - output_90 + (ow>>1), ow, iw>>1, ih>>1); - Rotate90(input + (iw>>1), iw, - output_90 + (ow>>1) + ow*(oh>>1), ow, iw>>1, ih>>1); - Rotate90(input + iw*(ih>>1), iw, - output_90, ow, iw>>1, ih>>1); - Rotate90(input + (iw>>1) + iw*(ih>>1), iw, - output_90 + ow*(oh>>1), ow, iw>>1, ih>>1); + RotatePlane90(input, iw, + output_90 + (ow >> 1), ow, + iw >> 1, ih >> 1); + RotatePlane90(input + (iw >> 1), iw, + output_90 + (ow >> 1) + ow * (oh >> 1), ow, + iw >> 1, ih >> 1); + RotatePlane90(input + iw * (ih >> 1), iw, + output_90, ow, + iw >> 1, ih >> 1); + RotatePlane90(input + (iw >> 1) + iw * (ih >> 1), iw, + output_90 + ow * (oh >> 1), ow, + iw >> 1, ih >> 1); - Rotate270(output_90, ih, output_0, iw, ow, oh); + RotatePlane270(output_90, ih, output_0, iw, ow, oh); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) { if (input[i] != output_0[i]) err++; } @@ -648,7 +657,7 @@ TEST_F(libyuvTest, Rotate90Pitch) { EXPECT_EQ(0, err); } -TEST_F(libyuvTest, Rotate270Pitch) { +TEST_F(libyuvTest, RotatePlane270Pitch) { int iw, ih, ow, oh; int err = 0; @@ -662,27 +671,29 @@ TEST_F(libyuvTest, Rotate270Pitch) { ow = ih; oh = iw; - input = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_0 = static_cast(malloc(sizeof(uint8)*iw*ih)); - output_270 = static_cast(malloc(sizeof(uint8)*ow*oh)); + input = static_cast(calloc(iw * ih, sizeof(uint8))); + output_0 = static_cast(calloc(iw * ih, sizeof(uint8))); + output_270 = static_cast(calloc(ow * oh, sizeof(uint8))); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) input[i] = i; - output_270[i] = 0; - } - Rotate270(input, iw, - output_270 + ow*(oh>>1), ow, iw>>1, ih>>1); - Rotate270(input + (iw>>1), iw, - output_270, ow, iw>>1, ih>>1); - Rotate270(input + iw*(ih>>1), iw, - output_270 + (ow>>1) + ow*(oh>>1), ow, iw>>1, ih>>1); - Rotate270(input + (iw>>1) + iw*(ih>>1), iw, - output_270 + (ow>>1), ow, iw>>1, ih>>1); + RotatePlane270(input, iw, + output_270 + ow * (oh >> 1), ow, + iw >> 1, ih >> 1); + RotatePlane270(input + (iw >> 1), iw, + output_270, ow, + iw >> 1, ih >> 1); + RotatePlane270(input + iw * (ih >> 1), iw, + output_270 + (ow >> 1) + ow * (oh >> 1), ow, + iw >> 1, ih >> 1); + RotatePlane270(input + (iw >> 1) + iw * (ih >> 1), iw, + output_270 + (ow >> 1), ow, + iw >> 1, ih >> 1); - Rotate90(output_270, ih, output_0, iw, ow, oh); + RotatePlane90(output_270, ih, output_0, iw, ow, oh); - for (i = 0; i < iw*ih; ++i) { + for (i = 0; i < (iw * ih); ++i) { if (input[i] != output_0[i]) err++; } @@ -705,3 +716,804 @@ TEST_F(libyuvTest, Rotate270Pitch) { EXPECT_EQ(0, err); } + +TEST_F(libyuvTest, I420Rotate90) { + int err = 0; + uint8 *orig_y, *orig_u, *orig_v; + uint8 *ro0_y, *ro0_u, *ro0_v; + uint8 *ro90_y, *ro90_u, *ro90_v; + uint8 *ro270_y, *ro270_u, *ro270_v; + + int yw = 1024; + int yh = 768; + int b = 128; + int uvw = (yw + 1) >> 1; + int uvh = (yh + 1) >> 1; + + int i, j; + + int y_plane_size = (yw + (2 * b)) * (yh + (2 * b)); + int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b)); + + srandom(time(NULL)); + + orig_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + orig_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + orig_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro0_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro0_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro0_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro90_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro90_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro90_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro270_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro270_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro270_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + // fill image buffers with random data + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + (2 * b)) + j] = random() & 0xff; + } + } + + for (i = b; i < (uvh + b); ++i) { + for (j = b; j < (uvw + b); ++j) { + orig_u[i * (uvw + (2 * b)) + j] = random() & 0xff; + orig_v[i * (uvw + (2 * b)) + j] = random() & 0xff; + } + } + + int y_off_0 = b * (yw + (2 * b)) + b; + int uv_off_0 = b * (uvw + (2 * b)) + b; + int y_off_90 = b * (yh + (2 * b)) + b; + int uv_off_90 = b * (uvh + (2 * b)) + b; + + int y_st_0 = yw + (2 * b); + int uv_st_0 = uvw + (2 * b); + int y_st_90 = yh + (2 * b); + int uv_st_90 = uvh + (2 * b); + + I420Rotate(orig_y+y_off_0, y_st_0, + orig_u+uv_off_0, uv_st_0, + orig_v+uv_off_0, uv_st_0, + ro90_y+y_off_90, y_st_90, + ro90_u+uv_off_90, uv_st_90, + ro90_v+uv_off_90, uv_st_90, + yw, yh, + kRotateClockwise); + + I420Rotate(ro90_y+y_off_90, y_st_90, + ro90_u+uv_off_90, uv_st_90, + ro90_v+uv_off_90, uv_st_90, + ro270_y+y_off_90, y_st_90, + ro270_u+uv_off_90, uv_st_90, + ro270_v+uv_off_90, uv_st_90, + yh, yw, + kRotate180); + + I420Rotate(ro270_y+y_off_90, y_st_90, + ro270_u+uv_off_90, uv_st_90, + ro270_v+uv_off_90, uv_st_90, + ro0_y+y_off_0, y_st_0, + ro0_u+uv_off_0, uv_st_0, + ro0_v+uv_off_0, uv_st_0, + yh, yw, + kRotateClockwise); + + for (i = 0; i < y_plane_size; ++i) { + if (orig_y[i] != ro0_y[i]) + ++err; + } + + for (i = 0; i < uv_plane_size; ++i) { + if (orig_u[i] != ro0_u[i]) + ++err; + if (orig_v[i] != ro0_v[i]) + ++err; + } + + free(orig_y); + free(orig_u); + free(orig_v); + free(ro0_y); + free(ro0_u); + free(ro0_v); + free(ro90_y); + free(ro90_u); + free(ro90_v); + free(ro270_y); + free(ro270_u); + free(ro270_v); + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, I420Rotate270) { + int err = 0; + uint8 *orig_y, *orig_u, *orig_v; + uint8 *ro0_y, *ro0_u, *ro0_v; + uint8 *ro90_y, *ro90_u, *ro90_v; + uint8 *ro270_y, *ro270_u, *ro270_v; + + int yw = 1024; + int yh = 768; + int b = 128; + int uvw = (yw + 1) >> 1; + int uvh = (yh + 1) >> 1; + + int i, j; + + int y_plane_size = (yw + (2 * b)) * (yh + (2 * b)); + int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b)); + + srandom(time(NULL)); + + orig_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + orig_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + orig_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro0_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro0_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro0_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro90_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro90_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro90_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro270_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro270_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro270_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + // fill image buffers with random data + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + (2 * b)) + j] = random() & 0xff; + } + } + + for (i = b; i < (uvh + b); ++i) { + for (j = b; j < (uvw + b); ++j) { + orig_u[i * (uvw + (2 * b)) + j] = random() & 0xff; + orig_v[i * (uvw + (2 * b)) + j] = random() & 0xff; + } + } + + int y_off_0 = b * (yw + (2 * b)) + b; + int uv_off_0 = b * (uvw + (2 * b)) + b; + int y_off_90 = b * (yh + (2 * b)) + b; + int uv_off_90 = b * (uvh + (2 * b)) + b; + + int y_st_0 = yw + (2 * b); + int uv_st_0 = uvw + (2 * b); + int y_st_90 = yh + (2 * b); + int uv_st_90 = uvh + (2 * b); + + I420Rotate(orig_y+y_off_0, y_st_0, + orig_u+uv_off_0, uv_st_0, + orig_v+uv_off_0, uv_st_0, + ro270_y+y_off_90, y_st_90, + ro270_u+uv_off_90, uv_st_90, + ro270_v+uv_off_90, uv_st_90, + yw, yh, + kRotateCounterClockwise); + + I420Rotate(ro270_y+y_off_90, y_st_90, + ro270_u+uv_off_90, uv_st_90, + ro270_v+uv_off_90, uv_st_90, + ro90_y+y_off_90, y_st_90, + ro90_u+uv_off_90, uv_st_90, + ro90_v+uv_off_90, uv_st_90, + yh, yw, + kRotate180); + + I420Rotate(ro90_y+y_off_90, y_st_90, + ro90_u+uv_off_90, uv_st_90, + ro90_v+uv_off_90, uv_st_90, + ro0_y+y_off_0, y_st_0, + ro0_u+uv_off_0, uv_st_0, + ro0_v+uv_off_0, uv_st_0, + yh, yw, + kRotateCounterClockwise); + + for (i = 0; i < y_plane_size; ++i) { + if (orig_y[i] != ro0_y[i]) + ++err; + } + + for (i = 0; i < uv_plane_size; ++i) { + if (orig_u[i] != ro0_u[i]) + ++err; + if (orig_v[i] != ro0_v[i]) + ++err; + } + + free(orig_y); + free(orig_u); + free(orig_v); + free(ro0_y); + free(ro0_u); + free(ro0_v); + free(ro90_y); + free(ro90_u); + free(ro90_v); + free(ro270_y); + free(ro270_u); + free(ro270_v); + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, NV12ToI420Rotate90) { + int err = 0; + uint8 *orig_y, *orig_uv; + uint8 *ro0_y, *ro0_u, *ro0_v; + uint8 *ro90_y, *ro90_u, *ro90_v; + + int yw = 1024; + int yh = 768; + int b = 128; + int uvw = (yw + 1) >> 1; + int uvh = (yh + 1) >> 1; + int i, j; + + int y_plane_size = (yw + (2 * b)) * (yh + (2 * b)); + int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b)); + int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b)); + + srandom(time(NULL)); + + orig_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + orig_uv = static_cast(calloc(o_uv_plane_size, sizeof(uint8))); + + ro0_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro0_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro0_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro90_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro90_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro90_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + // fill image buffers with random data + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + (2 * b)) + j] = random() & 0xff; + } + } + + for (i = b; i < (uvh + b); ++i) { + for (j = b; j < ((2 * uvw) + b); j += 2) { + uint8 random_number = random() & 0x7f; + orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number; + orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number; + } + } + + int y_off_0 = b * (yw + (2 * b)) + b; + int uv_off_0 = b * (uvw + (2 * b)) + b; + int y_off_90 = b * (yh + (2 * b)) + b; + int uv_off_90 = b * (uvh + (2 * b)) + b; + + int y_st_0 = yw + (2 * b); + int uv_st_0 = uvw + (2 * b); + int y_st_90 = yh + (2 * b); + int uv_st_90 = uvh + (2 * b); + + NV12ToI420Rotate(orig_y+y_off_0, y_st_0, + orig_uv+y_off_0, y_st_0, + ro90_y+y_off_90, y_st_90, + ro90_u+uv_off_90, uv_st_90, + ro90_v+uv_off_90, uv_st_90, + yw, yh, + kRotateClockwise); + + I420Rotate(ro90_y+y_off_90, y_st_90, + ro90_u+uv_off_90, uv_st_90, + ro90_v+uv_off_90, uv_st_90, + ro0_y+y_off_0, y_st_0, + ro0_u+uv_off_0, uv_st_0, + ro0_v+uv_off_0, uv_st_0, + yh, yw, + kRotateCounterClockwise); + + for (i = 0; i < y_plane_size; ++i) { + if (orig_y[i] != ro0_y[i]) + ++err; + } + + int zero_cnt = 0; + + for (i = 0; i < uv_plane_size; ++i) { + if ((signed char)ro0_u[i] != -(signed char)ro0_v[i]) + ++err; + if (ro0_u[i] != 0) + ++zero_cnt; + } + + if (!zero_cnt) + ++err; + + free(orig_y); + free(orig_uv); + free(ro0_y); + free(ro0_u); + free(ro0_v); + free(ro90_y); + free(ro90_u); + free(ro90_v); + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, NV12ToI420Rotate270) { + int err = 0; + uint8 *orig_y, *orig_uv; + uint8 *ro0_y, *ro0_u, *ro0_v; + uint8 *ro270_y, *ro270_u, *ro270_v; + + int yw = 1024; + int yh = 768; + int b = 128; + int uvw = (yw + 1) >> 1; + int uvh = (yh + 1) >> 1; + + int i, j; + + int y_plane_size = (yw + (2 * b)) * (yh + (2 * b)); + int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b)); + int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b)); + + srandom(time(NULL)); + + orig_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + orig_uv = static_cast(calloc(o_uv_plane_size, sizeof(uint8))); + + ro0_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro0_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro0_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro270_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro270_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro270_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + // fill image buffers with random data + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + (2 * b)) + j] = random() & 0xff; + } + } + + for (i = b; i < (uvh + b); ++i) { + for (j = b; j < ((2 * uvw) + b); j += 2) { + uint8 random_number = random() & 0x7f; + orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number; + orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number; + } + } + + int y_off_0 = b * (yw + (2 * b)) + b; + int uv_off_0 = b * (uvw + (2 * b)) + b; + int y_off_270 = b * (yh + (2 * b)) + b; + int uv_off_270 = b * (uvh + (2 * b)) + b; + + int y_st_0 = yw + (2 * b); + int uv_st_0 = uvw + (2 * b); + int y_st_270 = yh + (2 * b); + int uv_st_270 = uvh + (2 * b); + + NV12ToI420Rotate(orig_y+y_off_0, y_st_0, + orig_uv+y_off_0, y_st_0, + ro270_y+y_off_270, y_st_270, + ro270_u+uv_off_270, uv_st_270, + ro270_v+uv_off_270, uv_st_270, + yw, yh, + kRotateCounterClockwise); + + I420Rotate(ro270_y+y_off_270, y_st_270, + ro270_u+uv_off_270, uv_st_270, + ro270_v+uv_off_270, uv_st_270, + ro0_y+y_off_0, y_st_0, + ro0_u+uv_off_0, uv_st_0, + ro0_v+uv_off_0, uv_st_0, + yh, yw, + kRotateClockwise); + + for (i = 0; i < y_plane_size; ++i) { + if (orig_y[i] != ro0_y[i]) + ++err; + } + + int zero_cnt = 0; + + for (i = 0; i < uv_plane_size; ++i) { + if ((signed char)ro0_u[i] != -(signed char)ro0_v[i]) + ++err; + if (ro0_u[i] != 0) + ++zero_cnt; + } + + if (!zero_cnt) + ++err; + + free(orig_y); + free(orig_uv); + free(ro0_y); + free(ro0_u); + free(ro0_v); + free(ro270_y); + free(ro270_u); + free(ro270_v); + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, NV12ToI420Rotate180) { + int err = 0; + uint8 *orig_y, *orig_uv; + uint8 *ro0_y, *ro0_u, *ro0_v; + uint8 *ro180_y, *ro180_u, *ro180_v; + + int yw = 1024; + int yh = 768; + int b = 128; + int uvw = (yw + 1) >> 1; + int uvh = (yh + 1) >> 1; + + int i, j; + + int y_plane_size = (yw + (2 * b)) * (yh + (2 * b)); + int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b)); + int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b)); + + srandom(time(NULL)); + + orig_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + orig_uv = static_cast(calloc(o_uv_plane_size, sizeof(uint8))); + + ro0_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro0_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro0_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + ro180_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + ro180_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + ro180_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + // fill image buffers with random data + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + (2 * b)) + j] = random() & 0xff; + } + } + + for (i = b; i < (uvh + b); ++i) { + for (j = b; j < ((2 * uvw) + b); j += 2) { + uint8 random_number = random() & 0x7f; + orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number; + orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number; + } + } + + int y_off = b * (yw + (2 * b)) + b; + int uv_off = b * (uvw + (2 * b)) + b; + + int y_st = yw + (2 * b); + int uv_st = uvw + (2 * b); + + NV12ToI420Rotate(orig_y+y_off, y_st, + orig_uv+y_off, y_st, + ro180_y+y_off, y_st, + ro180_u+uv_off, uv_st, + ro180_v+uv_off, uv_st, + yw, yh, + kRotate180); + + I420Rotate(ro180_y+y_off, y_st, + ro180_u+uv_off, uv_st, + ro180_v+uv_off, uv_st, + ro0_y+y_off, y_st, + ro0_u+uv_off, uv_st, + ro0_v+uv_off, uv_st, + yw, yh, + kRotate180); + + for (i = 0; i < y_plane_size; ++i) { + if (orig_y[i] != ro0_y[i]) + ++err; + } + + int zero_cnt = 0; + + for (i = 0; i < uv_plane_size; ++i) { + if ((signed char)ro0_u[i] != -(signed char)ro0_v[i]) + ++err; + if (ro0_u[i] != 0) + ++zero_cnt; + } + + if (!zero_cnt) + ++err; + + free(orig_y); + free(orig_uv); + free(ro0_y); + free(ro0_u); + free(ro0_v); + free(ro180_y); + free(ro180_u); + free(ro180_v); + + EXPECT_EQ(0, err); +} + +TEST_F(libyuvTest, NV12ToI420RotateNegHeight90) { + int y_err = 0, uv_err = 0; + uint8 *orig_y, *orig_uv; + uint8 *roa_y, *roa_u, *roa_v; + uint8 *rob_y, *rob_u, *rob_v; + uint8 *roc_y, *roc_u, *roc_v; + + int yw = 1024; + int yh = 768; + int b = 128; + int uvw = (yw + 1) >> 1; + int uvh = (yh + 1) >> 1; + int i, j; + + int y_plane_size = (yw + (2 * b)) * (yh + (2 * b)); + int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b)); + int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b)); + + srandom(time(NULL)); + + orig_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + orig_uv = static_cast(calloc(o_uv_plane_size, sizeof(uint8))); + + roa_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + roa_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + roa_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + rob_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + rob_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + rob_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + roc_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + roc_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + roc_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + // fill image buffers with random data + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + (2 * b)) + j] = random() & 0xff; + } + } + + for (i = b; i < (uvh + b); ++i) { + for (j = b; j < ((2 * uvw) + b); j += 2) { + uint8 random_number = random() & 0x7f; + orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number; + orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number; + } + } + + int y_off_0 = b * (yw + (2 * b)) + b; + int uv_off_0 = b * (uvw + (2 * b)) + b; + int y_off_90 = b * (yh + (2 * b)) + b; + int uv_off_90 = b * (uvh + (2 * b)) + b; + + int y_st_0 = yw + (2 * b); + int uv_st_0 = uvw + (2 * b); + int y_st_90 = yh + (2 * b); + int uv_st_90 = uvh + (2 * b); + + NV12ToI420Rotate(orig_y+y_off_0, y_st_0, + orig_uv+y_off_0, y_st_0, + roa_y+y_off_90, y_st_90, + roa_u+uv_off_90, uv_st_90, + roa_v+uv_off_90, uv_st_90, + yw, -yh, + kRotateClockwise); + + I420Rotate(roa_y+y_off_90, y_st_90, + roa_u+uv_off_90, uv_st_90, + roa_v+uv_off_90, uv_st_90, + rob_y+y_off_0, y_st_0, + rob_u+uv_off_0, uv_st_0, + rob_v+uv_off_0, uv_st_0, + yh, -yw, + kRotateCounterClockwise); + + I420Rotate(rob_y+y_off_0, y_st_0, + rob_u+uv_off_0, uv_st_0, + rob_v+uv_off_0, uv_st_0, + roc_y+y_off_0, y_st_0, + roc_u+uv_off_0, uv_st_0, + roc_v+uv_off_0, uv_st_0, + yw, yh, + kRotate180); + + for (i = 0; i < y_plane_size; ++i) { + if (orig_y[i] != roc_y[i]) + ++y_err; + } + + if (y_err) { + printf("input %dx%d \n", yw, yh); + print_array(orig_y, y_st_0, yh + (2 * b)); + + printf("rotate a\n"); + print_array(roa_y, y_st_90, y_st_0); + + printf("rotate b\n"); + print_array(rob_y, y_st_90, y_st_0); + + printf("rotate c\n"); + print_array(roc_y, y_st_0, y_st_90); + } + + int zero_cnt = 0; + + for (i = 0; i < uv_plane_size; ++i) { + if ((signed char)roc_u[i] != -(signed char)roc_v[i]) + ++uv_err; + if (rob_u[i] != 0) + ++zero_cnt; + } + + if (!zero_cnt) + ++uv_err; + + if (uv_err) { + printf("input %dx%d \n", (2 * uvw), uvh); + print_array(orig_uv, y_st_0, uvh + (2 * b)); + + printf("rotate a\n"); + print_array(roa_u, uv_st_90, uv_st_0); + print_array(roa_v, uv_st_90, uv_st_0); + + printf("rotate b\n"); + print_array(rob_u, uv_st_90, uv_st_0); + print_array(rob_v, uv_st_90, uv_st_0); + + printf("rotate c\n"); + print_array(roc_u, uv_st_0, uv_st_90); + print_array(roc_v, uv_st_0, uv_st_90); + } + + free(orig_y); + free(orig_uv); + free(roa_y); + free(roa_u); + free(roa_v); + free(rob_y); + free(rob_u); + free(rob_v); + free(roc_y); + free(roc_u); + free(roc_v); + + EXPECT_EQ(0, y_err + uv_err); +} + +TEST_F(libyuvTest, NV12ToI420RotateNegHeight180) { + int y_err = 0, uv_err = 0; + uint8 *orig_y, *orig_uv; + uint8 *roa_y, *roa_u, *roa_v; + uint8 *rob_y, *rob_u, *rob_v; + + int yw = 1024; + int yh = 768; + int b = 128; + int uvw = (yw + 1) >> 1; + int uvh = (yh + 1) >> 1; + int i, j; + + int y_plane_size = (yw + (2 * b)) * (yh + (2 * b)); + int uv_plane_size = (uvw + (2 * b)) * (uvh + (2 * b)); + int o_uv_plane_size = ((2 * uvw) + (2 * b)) * (uvh + (2 * b)); + + srandom(time(NULL)); + + orig_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + orig_uv = static_cast(calloc(o_uv_plane_size, sizeof(uint8))); + + roa_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + roa_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + roa_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + rob_y = static_cast(calloc(y_plane_size, sizeof(uint8))); + rob_u = static_cast(calloc(uv_plane_size, sizeof(uint8))); + rob_v = static_cast(calloc(uv_plane_size, sizeof(uint8))); + + // fill image buffers with random data + for (i = b; i < (yh + b); ++i) { + for (j = b; j < (yw + b); ++j) { + orig_y[i * (yw + (2 * b)) + j] = random() & 0xff; + } + } + + for (i = b; i < (uvh + b); ++i) { + for (j = b; j < ((2 * uvw) + b); j += 2) { + uint8 random_number = random() & 0x7f; + orig_uv[i * ((2 * uvw) + (2 * b)) + j] = random_number; + orig_uv[i * ((2 * uvw) + (2 * b)) + j + 1] = -random_number; + } + } + + int y_off = b * (yw + (2 * b)) + b; + int uv_off = b * (uvw + (2 * b)) + b; + + int y_st = yw + (2 * b); + int uv_st = uvw + (2 * b); + + NV12ToI420Rotate(orig_y+y_off, y_st, + orig_uv+y_off, y_st, + roa_y+y_off, y_st, + roa_u+uv_off, uv_st, + roa_v+uv_off, uv_st, + yw, -yh, + kRotate180); + + I420Rotate(roa_y+y_off, y_st, + roa_u+uv_off, uv_st, + roa_v+uv_off, uv_st, + rob_y+y_off, y_st, + rob_u+uv_off, uv_st, + rob_v+uv_off, uv_st, + yw, -yh, + kRotate180); + + for (i = 0; i < y_plane_size; ++i) { + if (orig_y[i] != rob_y[i]) + ++y_err; + } + + if (y_err) { + printf("input %dx%d \n", yw, yh); + print_array(orig_y, y_st, yh + (2 * b)); + + printf("rotate a\n"); + print_array(roa_y, y_st, yh + (2 * b)); + + printf("rotate b\n"); + print_array(rob_y, y_st, yh + (2 * b)); + } + + int zero_cnt = 0; + + for (i = 0; i < uv_plane_size; ++i) { + if ((signed char)rob_u[i] != -(signed char)rob_v[i]) + ++uv_err; + if (rob_u[i] != 0) + ++zero_cnt; + } + + if (!zero_cnt) + ++uv_err; + + if (uv_err) { + printf("input %dx%d \n", (2 * uvw), uvh); + print_array(orig_uv, y_st, uvh + (2 * b)); + + printf("rotate a\n"); + print_array(roa_u, uv_st, uvh + (2 * b)); + print_array(roa_v, uv_st, uvh + (2 * b)); + + printf("rotate b\n"); + print_array(rob_u, uv_st, uvh + (2 * b)); + print_array(rob_v, uv_st, uvh + (2 * b)); + } + + free(orig_y); + free(orig_uv); + free(roa_y); + free(roa_u); + free(roa_v); + free(rob_y); + free(rob_u); + free(rob_v); + + EXPECT_EQ(0, y_err + uv_err); +} diff --git a/unit_test/unit_test.h b/unit_test/unit_test.h index 5265c1656..cac30c72a 100644 --- a/unit_test/unit_test.h +++ b/unit_test/unit_test.h @@ -11,7 +11,6 @@ #ifndef UINIT_TEST_H_ #define UINIT_TEST_H_ -#include "basic_types.h" #include class libyuvTest : public ::testing::Test { @@ -20,8 +19,8 @@ class libyuvTest : public ::testing::Test { virtual void SetUp(); virtual void TearDown(); - const uint32 _rotate_max_w; - const uint32 _rotate_max_h; + const int _rotate_max_w; + const int _rotate_max_h; };