From a7c87e19f010f109a03e99abfe664ba35d8b8e62 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Fri, 13 Jan 2017 15:50:24 -0800 Subject: [PATCH] add Intel Code Analyst markers add macros to enable/disable code analyst around blocks of code. Normally these macros should not be used, but if performance details are wanted for intel code, enable them around the code and then run via the iaca tool, available on the intel website. BUG=libyuv:670 TEST=~/iaca-lin64/bin/iaca.sh -64 out/Release/libyuv_unittest R=wangcheng@google.com Review-Url: https://codereview.chromium.org/2626193002 . --- README.chromium | 2 +- include/libyuv/row.h | 51 +++++++++++++++++++++++++++++++++++++++ include/libyuv/version.h | 2 +- source/convert_to_argb.cc | 5 ++-- source/row_gcc.cc | 5 ++-- unit_test/convert_test.cc | 23 +++++++++--------- 6 files changed, 69 insertions(+), 19 deletions(-) diff --git a/README.chromium b/README.chromium index 7f12f0cec..fac6b4781 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1638 +Version: 1639 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index a182c7a25..ef5b952f4 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -611,6 +611,57 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 #endif #endif +// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be +// measured and then run with iaca -64 libyuv_unittest. +// IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within +// inline assembly blocks. +// example of iaca: +// ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest + +#if defined(__x86_64__) || defined(__i386__) + +#define IACA_ASM_START \ + ".byte 0x0F, 0x0B\n" \ + " movl $111, %%ebx\n" \ + ".byte 0x64, 0x67, 0x90\n" + +#define IACA_ASM_END \ + " movl $222, %%ebx\n" \ + ".byte 0x64, 0x67, 0x90\n" \ + ".byte 0x0F, 0x0B\n" + +#define IACA_SSC_MARK(MARK_ID) \ + __asm__ __volatile__("\n\t movl $" #MARK_ID \ + ", %%ebx" \ + "\n\t .byte 0x64, 0x67, 0x90" \ + : \ + : \ + : "memory"); + +#define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B"); + +#else /* Visual C */ +#define IACA_UD_BYTES \ + { __asm _emit 0x0F __asm _emit 0x0B } + +#define IACA_SSC_MARK(x) \ + { __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 } + +#define IACA_VC64_START __writegsbyte(111, 111); +#define IACA_VC64_END __writegsbyte(222, 222); +#endif + +#define IACA_START \ + { \ + IACA_UD_BYTES \ + IACA_SSC_MARK(111) \ + } +#define IACA_END \ + { \ + IACA_SSC_MARK(222) \ + IACA_UD_BYTES \ + } + void I444ToARGBRow_NEON(const uint8* src_y, const uint8* src_u, const uint8* src_v, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index d9ef2fe4d..67a22493d 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1638 +#define LIBYUV_VERSION 1639 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/convert_to_argb.cc b/source/convert_to_argb.cc index f9e0e808a..63a5104b3 100644 --- a/source/convert_to_argb.cc +++ b/source/convert_to_argb.cc @@ -257,9 +257,8 @@ int ConvertToARGB(const uint8* sample, free(rotate_buffer); } else if (rotation) { src = sample + (src_width * crop_y + crop_x) * 4; - r = ARGBRotate(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height, rotation); + r = ARGBRotate(src, src_width * 4, crop_argb, argb_stride, crop_width, + inv_crop_height, rotation); } return r; diff --git a/source/row_gcc.cc b/source/row_gcc.cc index dccf0135b..4f6bd8dde 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -2112,6 +2112,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" + "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] @@ -5458,7 +5459,7 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { } #endif // HAS_HALFFLOATROW_AVX2 -#ifdef HAS_HALFFLOATROW_F16C +//#ifdef HAS_HALFFLOATROW_F16C void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { asm volatile ( "vbroadcastss %3, %%ymm4 \n" @@ -5490,7 +5491,7 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { "xmm2", "xmm3", "xmm4" ); } -#endif // HAS_HALFFLOATROW_F16C +//#endif // HAS_HALFFLOATROW_F16C #ifdef HAS_HALFFLOATROW_F16C void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index e471a8e32..3e55789a8 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1925,18 +1925,17 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) { dst[2] = 0x00000000; dst[3] = 0x00000000; - int r = ConvertToARGB( - reinterpret_cast(src), - 16, // input size - reinterpret_cast(dst), - 8, // destination stride - 0, // crop_x - 0, // crop_y - 2, // width - 2, // height - 2, // crop width - 2, // crop height - kRotate90, FOURCC_ARGB); + int r = ConvertToARGB(reinterpret_cast(src), + 16, // input size + reinterpret_cast(dst), + 8, // destination stride + 0, // crop_x + 0, // crop_y + 2, // width + 2, // height + 2, // crop width + 2, // crop height + kRotate90, FOURCC_ARGB); EXPECT_EQ(r, 0); // 90 degrees rotation, no conversion