Add vzeroupper to AVX row functions

- move power of two macro to planar functions source
- revert row.h IS_ALIGNED change

Bug: b/258474032
Change-Id: If87bb8d55c9b9930dd3e378614f8e4faae0870e9
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4035166
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2022-11-17 11:36:30 -08:00 committed by libyuv LUCI CQ
parent 2d2cee418a
commit 8713ba3f0b
5 changed files with 24 additions and 21 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1849
Version: 1850
License: BSD
License File: LICENSE

View File

@ -825,8 +825,7 @@ struct YuvConstants {
#endif
#define IS_POWEROFTWO(x) (!((x) & ((x) - 1)))
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
#define align_buffer_64(var, size) \
uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1849
#define LIBYUV_VERSION 1850
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -913,6 +913,9 @@ int NV21ToNV12(const uint8_t* src_y,
return 0;
}
// Test if tile_height is a power of 2 (16 or 32)
#define IS_POWEROFTWO(x) (!((x) & ((x)-1)))
// Detile a plane of data
// tile width is 16 and assumed.
// tile_height is 16 or 32 for MM21.
@ -930,7 +933,8 @@ int DetilePlane(const uint8_t* src_y,
int y;
void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
int width) = DetileRow_C;
if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) {
if (!src_y || !dst_y || width <= 0 || height == 0 ||
!IS_POWEROFTWO(tile_height)) {
return -1;
}
@ -986,7 +990,8 @@ int DetilePlane_16(const uint16_t* src_y,
int y;
void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride,
uint16_t* dst, int width) = DetileRow_16_C;
if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) {
if (!src_y || !dst_y || width <= 0 || height == 0 ||
!IS_POWEROFTWO(tile_height)) {
return -1;
}

View File

@ -1210,6 +1210,7 @@ void ARGBToAR64Row_AVX2(const uint8_t* src_argb,
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_ar64), // %1
"+r"(width) // %2
@ -1237,6 +1238,7 @@ void ARGBToAB64Row_AVX2(const uint8_t* src_argb,
"lea 0x40(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_ab64), // %1
"+r"(width) // %2
@ -1265,6 +1267,7 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
"lea 0x20(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_ar64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@ -1293,6 +1296,7 @@ void AB64ToARGBRow_AVX2(const uint16_t* src_ab64,
"lea 0x20(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_ab64), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
@ -1457,9 +1461,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vbroadcastf128 %5,%%ymm7 \n"
"vmovdqu %6,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(ymm7)
"vmovdqu %6,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
ymm7) "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@ -1479,9 +1482,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vbroadcastf128 %5,%%ymm7 \n"
"vmovdqu %6,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(ymm7)
"vmovdqu %6,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
ymm7) "vzeroupper \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@ -1500,9 +1502,8 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(ymm5)
"vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
ymm5) "vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@ -1520,9 +1521,8 @@ void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(ymm5)
"vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
ymm5) "vzeroupper \n"
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
@ -1540,9 +1540,7 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(
"vmovdqu %5,%%ymm6 \n" LABELALIGN RGBTOY_AVX2(
ymm5) "vzeroupper \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
@ -6479,6 +6477,7 @@ void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) {
"lea 0x40(%1),%1 \n"
"sub $0x40,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2