Add CopyPlane_Unaligned, _Any and _Invert tests/benchmarksCpuId test

- Add AMD_ERMSB detect for ERMS on AMD

Bug: 379457420
Change-Id: I608568556024faf19abe4d0662aeeee553a0a349
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6032852
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2024-11-19 13:47:14 -08:00
parent 1c501a8f3f
commit 307b951229
4 changed files with 61 additions and 79 deletions

View File

@ -31,12 +31,12 @@ static const int kCpuHasSVE2 = 0x1000;
static const int kCpuHasSME = 0x2000;
// These flags are only valid on RISCV processors.
static const int kCpuHasRISCV = 0x8;
static const int kCpuHasRISCV = 0x4;
static const int kCpuHasRVV = 0x100;
static const int kCpuHasRVVZVFH = 0x200;
// These flags are only valid on x86 processors.
static const int kCpuHasX86 = 0x10;
static const int kCpuHasX86 = 0x8;
static const int kCpuHasSSE2 = 0x100;
static const int kCpuHasSSSE3 = 0x200;
static const int kCpuHasSSE41 = 0x400;
@ -59,11 +59,11 @@ static const int kCpuHasAVXVNNIINT8 = 0x4000000;
static const int kCpuHasAMXINT8 = 0x8000000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x20;
static const int kCpuHasMIPS = 0x10;
static const int kCpuHasMSA = 0x100;
// These flags are only valid on LOONGARCH processors.
static const int kCpuHasLOONGARCH = 0x40;
static const int kCpuHasLOONGARCH = 0x20;
static const int kCpuHasLSX = 0x100;
static const int kCpuHasLASX = 0x200;

View File

@ -409,11 +409,13 @@ static SAFEBUFFERS int GetCpuFlags(void) {
int cpu_info1[4] = {0, 0, 0, 0};
int cpu_info7[4] = {0, 0, 0, 0};
int cpu_einfo7[4] = {0, 0, 0, 0};
int cpu_amdinfo21[4] = {0, 0, 0, 0};
CpuId(0, 0, cpu_info0);
CpuId(1, 0, cpu_info1);
if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7);
CpuId(7, 1, cpu_einfo7);
CpuId(0x80000021, 0, cpu_amdinfo21);
}
cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
@ -431,6 +433,8 @@ static SAFEBUFFERS int GetCpuFlags(void) {
((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) |
((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0);
cpu_info |= ((cpu_amdinfo21[0] & 0x00008000) ? kCpuHasERMS : 0);
// Detect AVX512bw
if ((GetXCR0() & 0xe0) == 0xe0) {
cpu_info |= ((cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0) |

View File

@ -1551,98 +1551,75 @@ TEST_F(LibYUVPlanarTest, TestAffine) {
#endif
}
TEST_F(LibYUVPlanarTest, TestCopyPlane) {
int err = 0;
int yw = benchmark_width_;
int yh = benchmark_height_;
int b = 12;
int i, j;
int y_plane_size = (yw + b * 2) * (yh + b * 2);
align_buffer_page_end(orig_y, y_plane_size);
static int TestCopyPlane(int width,
int height,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info,
int invert,
int off) {
int y_plane_size = width * height;
align_buffer_page_end(orig_y, y_plane_size + off);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
memset(orig_y, 0, y_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 0, y_plane_size);
// Fill image buffers with random data.
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
}
}
// Fill destination buffers with random data.
for (i = 0; i < y_plane_size; ++i) {
uint8_t random_number = fastrand() & 0x7f;
dst_c[i] = random_number;
dst_opt[i] = dst_c[i];
}
int y_off = b * (yw + b * 2) + b;
int y_st = yw + b * 2;
int stride = 8;
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (j = 0; j < benchmark_iterations_; j++) {
CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (j = 0; j < benchmark_iterations_; j++) {
CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
}
for (i = 0; i < y_plane_size; ++i) {
if (dst_c[i] != dst_opt[i]) {
++err;
}
}
free_aligned_buffer_page_end(orig_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
EXPECT_EQ(0, err);
}
TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
int i;
int y_plane_size = benchmark_width_ * benchmark_height_;
align_buffer_page_end(orig_y, y_plane_size);
align_buffer_page_end(dst_c, y_plane_size);
align_buffer_page_end(dst_opt, y_plane_size);
MemRandomize(orig_y, y_plane_size);
MemRandomize(orig_y + off, y_plane_size);
memset(dst_c, 1, y_plane_size);
memset(dst_opt, 2, y_plane_size);
// Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_);
for (i = 0; i < benchmark_iterations_; i++) {
CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
benchmark_width_, benchmark_height_);
MaskCpuFlags(disable_cpu_flags);
for (int i = 0; i < benchmark_iterations; i++) {
CopyPlane(orig_y + off, width, dst_c, width, width, height * invert);
}
// Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_);
for (i = 0; i < benchmark_iterations_; i++) {
CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info);
for (int i = 0; i < benchmark_iterations; i++) {
CopyPlane(orig_y + off, width, dst_opt, width, width, height * invert);
}
for (i = 0; i < y_plane_size; ++i) {
EXPECT_EQ(dst_c[i], dst_opt[i]);
int max_diff = 0;
for (int i = 0; i < y_plane_size; ++i) {
int abs_diff = abs(static_cast<int>(dst_c[i]) - static_cast<int>(dst_opt[i]));
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
free_aligned_buffer_page_end(orig_y);
free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt);
return max_diff;
}
TEST_F(LibYUVPlanarTest, CopyPlane_Any) {
int max_diff = TestCopyPlane(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, CopyPlane_Unaligned) {
int max_diff =
TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
EXPECT_LE(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, CopyPlane_Invert) {
int max_diff =
TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
EXPECT_LE(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
int max_diff =
TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
EXPECT_LE(max_diff, 0);
}
TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {

View File

@ -192,6 +192,7 @@ int main(int argc, const char* argv[]) {
printf("Has AVX 0x%x\n", has_avx);
printf("Has AVX2 0x%x\n", has_avx2);
printf("Has ERMS 0x%x\n", has_erms);
printf("Has FSMR 0x%x\n", has_fsmr);
printf("Has FMA3 0x%x\n", has_fma3);
printf("Has F16C 0x%x\n", has_f16c);
printf("Has AVX512BW 0x%x\n", has_avx512bw);