mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
add Intel Code Analyst markers
add macros to enable/disable code analyst around blocks of code. Normally these macros should not be used, but if performance details are wanted for intel code, enable them around the code and then run via the iaca tool, available on the intel website. BUG=libyuv:670 TEST=~/iaca-lin64/bin/iaca.sh -64 out/Release/libyuv_unittest R=wangcheng@google.com Review-Url: https://codereview.chromium.org/2626193002 .
This commit is contained in:
parent
73a6f100a9
commit
a7c87e19f0
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1638
|
||||
Version: 1639
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -611,6 +611,57 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be
|
||||
// measured and then run with iaca -64 libyuv_unittest.
|
||||
// IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within
|
||||
// inline assembly blocks.
|
||||
// example of iaca:
|
||||
// ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#define IACA_ASM_START \
|
||||
".byte 0x0F, 0x0B\n" \
|
||||
" movl $111, %%ebx\n" \
|
||||
".byte 0x64, 0x67, 0x90\n"
|
||||
|
||||
#define IACA_ASM_END \
|
||||
" movl $222, %%ebx\n" \
|
||||
".byte 0x64, 0x67, 0x90\n" \
|
||||
".byte 0x0F, 0x0B\n"
|
||||
|
||||
#define IACA_SSC_MARK(MARK_ID) \
|
||||
__asm__ __volatile__("\n\t movl $" #MARK_ID \
|
||||
", %%ebx" \
|
||||
"\n\t .byte 0x64, 0x67, 0x90" \
|
||||
: \
|
||||
: \
|
||||
: "memory");
|
||||
|
||||
#define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B");
|
||||
|
||||
#else /* Visual C */
|
||||
#define IACA_UD_BYTES \
|
||||
{ __asm _emit 0x0F __asm _emit 0x0B }
|
||||
|
||||
#define IACA_SSC_MARK(x) \
|
||||
{ __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 }
|
||||
|
||||
#define IACA_VC64_START __writegsbyte(111, 111);
|
||||
#define IACA_VC64_END __writegsbyte(222, 222);
|
||||
#endif
|
||||
|
||||
#define IACA_START \
|
||||
{ \
|
||||
IACA_UD_BYTES \
|
||||
IACA_SSC_MARK(111) \
|
||||
}
|
||||
#define IACA_END \
|
||||
{ \
|
||||
IACA_SSC_MARK(222) \
|
||||
IACA_UD_BYTES \
|
||||
}
|
||||
|
||||
void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1638
|
||||
#define LIBYUV_VERSION 1639
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -257,9 +257,8 @@ int ConvertToARGB(const uint8* sample,
|
||||
free(rotate_buffer);
|
||||
} else if (rotation) {
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBRotate(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
r = ARGBRotate(src, src_width * 4, crop_argb, argb_stride, crop_width,
|
||||
inv_crop_height, rotation);
|
||||
}
|
||||
|
||||
return r;
|
||||
|
||||
@ -2112,6 +2112,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
STOREARGB_AVX2
|
||||
"sub $0x10,%[width] \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"vzeroupper \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
@ -5458,7 +5459,7 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
|
||||
}
|
||||
#endif // HAS_HALFFLOATROW_AVX2
|
||||
|
||||
#ifdef HAS_HALFFLOATROW_F16C
|
||||
//#ifdef HAS_HALFFLOATROW_F16C
|
||||
void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
|
||||
asm volatile (
|
||||
"vbroadcastss %3, %%ymm4 \n"
|
||||
@ -5490,7 +5491,7 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
|
||||
"xmm2", "xmm3", "xmm4"
|
||||
);
|
||||
}
|
||||
#endif // HAS_HALFFLOATROW_F16C
|
||||
//#endif // HAS_HALFFLOATROW_F16C
|
||||
|
||||
#ifdef HAS_HALFFLOATROW_F16C
|
||||
void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) {
|
||||
|
||||
@ -1925,18 +1925,17 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
|
||||
dst[2] = 0x00000000;
|
||||
dst[3] = 0x00000000;
|
||||
|
||||
int r = ConvertToARGB(
|
||||
reinterpret_cast<uint8_t*>(src),
|
||||
16, // input size
|
||||
reinterpret_cast<uint8_t*>(dst),
|
||||
8, // destination stride
|
||||
0, // crop_x
|
||||
0, // crop_y
|
||||
2, // width
|
||||
2, // height
|
||||
2, // crop width
|
||||
2, // crop height
|
||||
kRotate90, FOURCC_ARGB);
|
||||
int r = ConvertToARGB(reinterpret_cast<uint8_t*>(src),
|
||||
16, // input size
|
||||
reinterpret_cast<uint8_t*>(dst),
|
||||
8, // destination stride
|
||||
0, // crop_x
|
||||
0, // crop_y
|
||||
2, // width
|
||||
2, // height
|
||||
2, // crop width
|
||||
2, // crop height
|
||||
kRotate90, FOURCC_ARGB);
|
||||
|
||||
EXPECT_EQ(r, 0);
|
||||
// 90 degrees rotation, no conversion
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user