add Intel Code Analyst markers

add macros to enable/disable code analyst around blocks of code.

Normally these macros should not be used, but if performance
details are wanted for intel code, enable them around the code
and then run via the iaca tool, available on the intel website.

BUG=libyuv:670
TEST=~/iaca-lin64/bin/iaca.sh -64 out/Release/libyuv_unittest
R=wangcheng@google.com

Review-Url: https://codereview.chromium.org/2626193002 .
This commit is contained in:
Frank Barchard 2017-01-13 15:50:24 -08:00
parent 73a6f100a9
commit a7c87e19f0
6 changed files with 69 additions and 19 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1638
Version: 1639
License: BSD
License File: LICENSE

View File

@ -611,6 +611,57 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
#endif
#endif
// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be
// measured and then run with iaca -64 libyuv_unittest.
// IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within
// inline assembly blocks.
// example of iaca:
// ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest
#if defined(__x86_64__) || defined(__i386__)
#define IACA_ASM_START \
".byte 0x0F, 0x0B\n" \
" movl $111, %%ebx\n" \
".byte 0x64, 0x67, 0x90\n"
#define IACA_ASM_END \
" movl $222, %%ebx\n" \
".byte 0x64, 0x67, 0x90\n" \
".byte 0x0F, 0x0B\n"
#define IACA_SSC_MARK(MARK_ID) \
__asm__ __volatile__("\n\t movl $" #MARK_ID \
", %%ebx" \
"\n\t .byte 0x64, 0x67, 0x90" \
: \
: \
: "memory");
#define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B");
#else /* Visual C */
#define IACA_UD_BYTES \
{ __asm _emit 0x0F __asm _emit 0x0B }
#define IACA_SSC_MARK(x) \
{ __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 }
#define IACA_VC64_START __writegsbyte(111, 111);
#define IACA_VC64_END __writegsbyte(222, 222);
#endif
#define IACA_START \
{ \
IACA_UD_BYTES \
IACA_SSC_MARK(111) \
}
#define IACA_END \
{ \
IACA_SSC_MARK(222) \
IACA_UD_BYTES \
}
void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1638
#define LIBYUV_VERSION 1639
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -257,9 +257,8 @@ int ConvertToARGB(const uint8* sample,
free(rotate_buffer);
} else if (rotation) {
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBRotate(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height, rotation);
r = ARGBRotate(src, src_width * 4, crop_argb, argb_stride, crop_width,
inv_crop_height, rotation);
}
return r;

View File

@ -2112,6 +2112,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
STOREARGB_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
@ -5458,7 +5459,7 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
}
#endif // HAS_HALFFLOATROW_AVX2
#ifdef HAS_HALFFLOATROW_F16C
//#ifdef HAS_HALFFLOATROW_F16C
void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
asm volatile (
"vbroadcastss %3, %%ymm4 \n"
@ -5490,7 +5491,7 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
"xmm2", "xmm3", "xmm4"
);
}
#endif // HAS_HALFFLOATROW_F16C
//#endif // HAS_HALFFLOATROW_F16C
#ifdef HAS_HALFFLOATROW_F16C
void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) {

View File

@ -1925,18 +1925,17 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
dst[2] = 0x00000000;
dst[3] = 0x00000000;
int r = ConvertToARGB(
reinterpret_cast<uint8_t*>(src),
16, // input size
reinterpret_cast<uint8_t*>(dst),
8, // destination stride
0, // crop_x
0, // crop_y
2, // width
2, // height
2, // crop width
2, // crop height
kRotate90, FOURCC_ARGB);
int r = ConvertToARGB(reinterpret_cast<uint8_t*>(src),
16, // input size
reinterpret_cast<uint8_t*>(dst),
8, // destination stride
0, // crop_x
0, // crop_y
2, // width
2, // height
2, // crop width
2, // crop height
kRotate90, FOURCC_ARGB);
EXPECT_EQ(r, 0);
// 90 degrees rotation, no conversion