From 307b951229cc2eb756af09ea111f7be6ff065d4f Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 19 Nov 2024 13:47:14 -0800 Subject: [PATCH] Add CopyPlane_Unaligned, _Any and _Invert tests/benchmarksCpuId test - Add AMD_ERMSB detect for ERMS on AMD Bug: 379457420 Change-Id: I608568556024faf19abe4d0662aeeee553a0a349 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6032852 Reviewed-by: Wan-Teh Chang --- include/libyuv/cpu_id.h | 8 +-- source/cpu_id.cc | 4 ++ unit_test/planar_test.cc | 127 ++++++++++++++++----------------------- util/cpuid.c | 1 + 4 files changed, 61 insertions(+), 79 deletions(-) diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 722ec3276..57a6a12e9 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -31,12 +31,12 @@ static const int kCpuHasSVE2 = 0x1000; static const int kCpuHasSME = 0x2000; // These flags are only valid on RISCV processors. -static const int kCpuHasRISCV = 0x8; +static const int kCpuHasRISCV = 0x4; static const int kCpuHasRVV = 0x100; static const int kCpuHasRVVZVFH = 0x200; // These flags are only valid on x86 processors. -static const int kCpuHasX86 = 0x10; +static const int kCpuHasX86 = 0x8; static const int kCpuHasSSE2 = 0x100; static const int kCpuHasSSSE3 = 0x200; static const int kCpuHasSSE41 = 0x400; @@ -59,11 +59,11 @@ static const int kCpuHasAVXVNNIINT8 = 0x4000000; static const int kCpuHasAMXINT8 = 0x8000000; // These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x20; +static const int kCpuHasMIPS = 0x10; static const int kCpuHasMSA = 0x100; // These flags are only valid on LOONGARCH processors. -static const int kCpuHasLOONGARCH = 0x40; +static const int kCpuHasLOONGARCH = 0x20; static const int kCpuHasLSX = 0x100; static const int kCpuHasLASX = 0x200; diff --git a/source/cpu_id.cc b/source/cpu_id.cc index e4d59a052..48a350c6e 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -409,11 +409,13 @@ static SAFEBUFFERS int GetCpuFlags(void) { int cpu_info1[4] = {0, 0, 0, 0}; int cpu_info7[4] = {0, 0, 0, 0}; int cpu_einfo7[4] = {0, 0, 0, 0}; + int cpu_amdinfo21[4] = {0, 0, 0, 0}; CpuId(0, 0, cpu_info0); CpuId(1, 0, cpu_info1); if (cpu_info0[0] >= 7) { CpuId(7, 0, cpu_info7); CpuId(7, 1, cpu_einfo7); + CpuId(0x80000021, 0, cpu_amdinfo21); } cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | @@ -431,6 +433,8 @@ static SAFEBUFFERS int GetCpuFlags(void) { ((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) | ((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0); + cpu_info |= ((cpu_amdinfo21[0] & 0x00008000) ? kCpuHasERMS : 0); + // Detect AVX512bw if ((GetXCR0() & 0xe0) == 0xe0) { cpu_info |= ((cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0) | diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 97f89f667..7b5a3e2d7 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1551,98 +1551,75 @@ TEST_F(LibYUVPlanarTest, TestAffine) { #endif } -TEST_F(LibYUVPlanarTest, TestCopyPlane) { - int err = 0; - int yw = benchmark_width_; - int yh = benchmark_height_; - int b = 12; - int i, j; - - int y_plane_size = (yw + b * 2) * (yh + b * 2); - align_buffer_page_end(orig_y, y_plane_size); +static int TestCopyPlane(int width, + int height, + int benchmark_iterations, + int disable_cpu_flags, + int benchmark_cpu_info, + int invert, + int off) { + int y_plane_size = width * height; + align_buffer_page_end(orig_y, y_plane_size + off); align_buffer_page_end(dst_c, y_plane_size); align_buffer_page_end(dst_opt, y_plane_size); - memset(orig_y, 0, y_plane_size); - memset(dst_c, 0, y_plane_size); - memset(dst_opt, 0, y_plane_size); - - // Fill image buffers with random data. - for (i = b; i < (yh + b); ++i) { - for (j = b; j < (yw + b); ++j) { - orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff; - } - } - - // Fill destination buffers with random data. - for (i = 0; i < y_plane_size; ++i) { - uint8_t random_number = fastrand() & 0x7f; - dst_c[i] = random_number; - dst_opt[i] = dst_c[i]; - } - - int y_off = b * (yw + b * 2) + b; - - int y_st = yw + b * 2; - int stride = 8; - - // Disable all optimizations. - MaskCpuFlags(disable_cpu_flags_); - for (j = 0; j < benchmark_iterations_; j++) { - CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh); - } - - // Enable optimizations. - MaskCpuFlags(benchmark_cpu_info_); - for (j = 0; j < benchmark_iterations_; j++) { - CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh); - } - - for (i = 0; i < y_plane_size; ++i) { - if (dst_c[i] != dst_opt[i]) { - ++err; - } - } - - free_aligned_buffer_page_end(orig_y); - free_aligned_buffer_page_end(dst_c); - free_aligned_buffer_page_end(dst_opt); - - EXPECT_EQ(0, err); -} - -TEST_F(LibYUVPlanarTest, CopyPlane_Opt) { - int i; - int y_plane_size = benchmark_width_ * benchmark_height_; - align_buffer_page_end(orig_y, y_plane_size); - align_buffer_page_end(dst_c, y_plane_size); - align_buffer_page_end(dst_opt, y_plane_size); - - MemRandomize(orig_y, y_plane_size); + MemRandomize(orig_y + off, y_plane_size); memset(dst_c, 1, y_plane_size); memset(dst_opt, 2, y_plane_size); // Disable all optimizations. - MaskCpuFlags(disable_cpu_flags_); - for (i = 0; i < benchmark_iterations_; i++) { - CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_, - benchmark_width_, benchmark_height_); + MaskCpuFlags(disable_cpu_flags); + for (int i = 0; i < benchmark_iterations; i++) { + CopyPlane(orig_y + off, width, dst_c, width, width, height * invert); } // Enable optimizations. - MaskCpuFlags(benchmark_cpu_info_); - for (i = 0; i < benchmark_iterations_; i++) { - CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_, - benchmark_width_, benchmark_height_); + MaskCpuFlags(benchmark_cpu_info); + for (int i = 0; i < benchmark_iterations; i++) { + CopyPlane(orig_y + off, width, dst_opt, width, width, height * invert); } - for (i = 0; i < y_plane_size; ++i) { - EXPECT_EQ(dst_c[i], dst_opt[i]); + int max_diff = 0; + for (int i = 0; i < y_plane_size; ++i) { + int abs_diff = abs(static_cast(dst_c[i]) - static_cast(dst_opt[i])); + if (abs_diff > max_diff) { + max_diff = abs_diff; + } } free_aligned_buffer_page_end(orig_y); free_aligned_buffer_page_end(dst_c); free_aligned_buffer_page_end(dst_opt); + + return max_diff; +} + +TEST_F(LibYUVPlanarTest, CopyPlane_Any) { + int max_diff = TestCopyPlane(benchmark_width_ + 1, benchmark_height_, + benchmark_iterations_, disable_cpu_flags_, + benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 0); +} + +TEST_F(LibYUVPlanarTest, CopyPlane_Unaligned) { + int max_diff = + TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 1); + EXPECT_LE(max_diff, 0); +} + +TEST_F(LibYUVPlanarTest, CopyPlane_Invert) { + int max_diff = + TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, -1, 0); + EXPECT_LE(max_diff, 0); +} + +TEST_F(LibYUVPlanarTest, CopyPlane_Opt) { + int max_diff = + TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, + disable_cpu_flags_, benchmark_cpu_info_, +1, 0); + EXPECT_LE(max_diff, 0); } TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) { diff --git a/util/cpuid.c b/util/cpuid.c index 766c43c19..ab05cc1c1 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -192,6 +192,7 @@ int main(int argc, const char* argv[]) { printf("Has AVX 0x%x\n", has_avx); printf("Has AVX2 0x%x\n", has_avx2); printf("Has ERMS 0x%x\n", has_erms); + printf("Has FSMR 0x%x\n", has_fsmr); printf("Has FMA3 0x%x\n", has_fma3); printf("Has F16C 0x%x\n", has_f16c); printf("Has AVX512BW 0x%x\n", has_avx512bw);