mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-02-14 22:29:52 +08:00
port I411 movzx 2 byte reader to gcc
previously the I411 format used movd to read U, V pixels. But this reads 4 bytes, and can cause a memory exception. pinsrw can be used, but fails on drmemory 1.5, and is slow. So in this change a movzxw is used to read 2 bytes into EBX, then copy to xmm0 with movd. Slightly slower, but no memory exception Was LibYUVConvertTest.I411ToARGB_Opt (577 ms) Now LibYUVConvertTest.I411ToARGB_Opt (608 ms) TBR=harryjin@google.com BUG=libyuv:525 Review URL: https://codereview.chromium.org/1457783004 .
This commit is contained in:
parent
5eefbe2330
commit
50f8cb2db3
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1540
|
Version: 1541
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1540
|
#define LIBYUV_VERSION 1541
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -1518,10 +1518,18 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
|||||||
"movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \
|
"movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \
|
||||||
"lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n"
|
"lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n"
|
||||||
|
|
||||||
// Read 2 UV from 411, upsample to 8 UV
|
// Read 2 UV from 411, upsample to 8 UV.
|
||||||
#define READYUV411 \
|
// reading 4 bytes is an msan violation.
|
||||||
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
// "movd " MEMACCESS([u_buf]) ",%%xmm0 \n"
|
||||||
MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
// MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)
|
||||||
|
// pinsrw fails with drmemory
|
||||||
|
// __asm pinsrw xmm0, [esi], 0 /* U */
|
||||||
|
// __asm pinsrw xmm1, [esi + edi], 0 /* V */
|
||||||
|
#define READYUV411_EBX \
|
||||||
|
"movzw " MEMACCESS([u_buf]) ",%%ebx \n" \
|
||||||
|
"movd %%ebx,%%xmm0 \n" \
|
||||||
|
MEMOPREG(movzw,0x00,[u_buf],[v_buf],1,ebx) " \n" \
|
||||||
|
"movd %%ebx,%%xmm1 \n" \
|
||||||
"lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
|
"lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
|
||||||
"punpcklbw %%xmm1,%%xmm0 \n" \
|
"punpcklbw %%xmm1,%%xmm0 \n" \
|
||||||
"punpcklwd %%xmm0,%%xmm0 \n" \
|
"punpcklwd %%xmm0,%%xmm0 \n" \
|
||||||
@ -1800,7 +1808,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
READYUV411
|
READYUV411_EBX
|
||||||
YUVTORGB(yuvconstants)
|
YUVTORGB(yuvconstants)
|
||||||
STOREARGB
|
STOREARGB
|
||||||
"sub $0x8,%[width] \n"
|
"sub $0x8,%[width] \n"
|
||||||
@ -1811,7 +1819,7 @@ void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
|
|||||||
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
[dst_argb]"+r"(dst_argb), // %[dst_argb]
|
||||||
[width]"+rm"(width) // %[width]
|
[width]"+rm"(width) // %[width]
|
||||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||||
: "memory", "cc", NACL_R14 YUVTORGB_REGS
|
: "memory", "cc", "ebx", NACL_R14 YUVTORGB_REGS
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user