mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
Add CopyPlane_Unaligned, _Any and _Invert tests/benchmarksCpuId test
- Add AMD_ERMSB detect for ERMS on AMD Bug: 379457420 Change-Id: I608568556024faf19abe4d0662aeeee553a0a349 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6032852 Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
1c501a8f3f
commit
307b951229
@ -31,12 +31,12 @@ static const int kCpuHasSVE2 = 0x1000;
|
||||
static const int kCpuHasSME = 0x2000;
|
||||
|
||||
// These flags are only valid on RISCV processors.
|
||||
static const int kCpuHasRISCV = 0x8;
|
||||
static const int kCpuHasRISCV = 0x4;
|
||||
static const int kCpuHasRVV = 0x100;
|
||||
static const int kCpuHasRVVZVFH = 0x200;
|
||||
|
||||
// These flags are only valid on x86 processors.
|
||||
static const int kCpuHasX86 = 0x10;
|
||||
static const int kCpuHasX86 = 0x8;
|
||||
static const int kCpuHasSSE2 = 0x100;
|
||||
static const int kCpuHasSSSE3 = 0x200;
|
||||
static const int kCpuHasSSE41 = 0x400;
|
||||
@ -59,11 +59,11 @@ static const int kCpuHasAVXVNNIINT8 = 0x4000000;
|
||||
static const int kCpuHasAMXINT8 = 0x8000000;
|
||||
|
||||
// These flags are only valid on MIPS processors.
|
||||
static const int kCpuHasMIPS = 0x20;
|
||||
static const int kCpuHasMIPS = 0x10;
|
||||
static const int kCpuHasMSA = 0x100;
|
||||
|
||||
// These flags are only valid on LOONGARCH processors.
|
||||
static const int kCpuHasLOONGARCH = 0x40;
|
||||
static const int kCpuHasLOONGARCH = 0x20;
|
||||
static const int kCpuHasLSX = 0x100;
|
||||
static const int kCpuHasLASX = 0x200;
|
||||
|
||||
|
||||
@ -409,11 +409,13 @@ static SAFEBUFFERS int GetCpuFlags(void) {
|
||||
int cpu_info1[4] = {0, 0, 0, 0};
|
||||
int cpu_info7[4] = {0, 0, 0, 0};
|
||||
int cpu_einfo7[4] = {0, 0, 0, 0};
|
||||
int cpu_amdinfo21[4] = {0, 0, 0, 0};
|
||||
CpuId(0, 0, cpu_info0);
|
||||
CpuId(1, 0, cpu_info1);
|
||||
if (cpu_info0[0] >= 7) {
|
||||
CpuId(7, 0, cpu_info7);
|
||||
CpuId(7, 1, cpu_einfo7);
|
||||
CpuId(0x80000021, 0, cpu_amdinfo21);
|
||||
}
|
||||
cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
@ -431,6 +433,8 @@ static SAFEBUFFERS int GetCpuFlags(void) {
|
||||
((cpu_einfo7[0] & 0x00000010) ? kCpuHasAVXVNNI : 0) |
|
||||
((cpu_einfo7[3] & 0x00000010) ? kCpuHasAVXVNNIINT8 : 0);
|
||||
|
||||
cpu_info |= ((cpu_amdinfo21[0] & 0x00008000) ? kCpuHasERMS : 0);
|
||||
|
||||
// Detect AVX512bw
|
||||
if ((GetXCR0() & 0xe0) == 0xe0) {
|
||||
cpu_info |= ((cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0) |
|
||||
|
||||
@ -1551,98 +1551,75 @@ TEST_F(LibYUVPlanarTest, TestAffine) {
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestCopyPlane) {
|
||||
int err = 0;
|
||||
int yw = benchmark_width_;
|
||||
int yh = benchmark_height_;
|
||||
int b = 12;
|
||||
int i, j;
|
||||
|
||||
int y_plane_size = (yw + b * 2) * (yh + b * 2);
|
||||
align_buffer_page_end(orig_y, y_plane_size);
|
||||
static int TestCopyPlane(int width,
|
||||
int height,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info,
|
||||
int invert,
|
||||
int off) {
|
||||
int y_plane_size = width * height;
|
||||
align_buffer_page_end(orig_y, y_plane_size + off);
|
||||
align_buffer_page_end(dst_c, y_plane_size);
|
||||
align_buffer_page_end(dst_opt, y_plane_size);
|
||||
|
||||
memset(orig_y, 0, y_plane_size);
|
||||
memset(dst_c, 0, y_plane_size);
|
||||
memset(dst_opt, 0, y_plane_size);
|
||||
|
||||
// Fill image buffers with random data.
|
||||
for (i = b; i < (yh + b); ++i) {
|
||||
for (j = b; j < (yw + b); ++j) {
|
||||
orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
// Fill destination buffers with random data.
|
||||
for (i = 0; i < y_plane_size; ++i) {
|
||||
uint8_t random_number = fastrand() & 0x7f;
|
||||
dst_c[i] = random_number;
|
||||
dst_opt[i] = dst_c[i];
|
||||
}
|
||||
|
||||
int y_off = b * (yw + b * 2) + b;
|
||||
|
||||
int y_st = yw + b * 2;
|
||||
int stride = 8;
|
||||
|
||||
// Disable all optimizations.
|
||||
MaskCpuFlags(disable_cpu_flags_);
|
||||
for (j = 0; j < benchmark_iterations_; j++) {
|
||||
CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
|
||||
}
|
||||
|
||||
// Enable optimizations.
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
for (j = 0; j < benchmark_iterations_; j++) {
|
||||
CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
|
||||
}
|
||||
|
||||
for (i = 0; i < y_plane_size; ++i) {
|
||||
if (dst_c[i] != dst_opt[i]) {
|
||||
++err;
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(orig_y);
|
||||
free_aligned_buffer_page_end(dst_c);
|
||||
free_aligned_buffer_page_end(dst_opt);
|
||||
|
||||
EXPECT_EQ(0, err);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
|
||||
int i;
|
||||
int y_plane_size = benchmark_width_ * benchmark_height_;
|
||||
align_buffer_page_end(orig_y, y_plane_size);
|
||||
align_buffer_page_end(dst_c, y_plane_size);
|
||||
align_buffer_page_end(dst_opt, y_plane_size);
|
||||
|
||||
MemRandomize(orig_y, y_plane_size);
|
||||
MemRandomize(orig_y + off, y_plane_size);
|
||||
memset(dst_c, 1, y_plane_size);
|
||||
memset(dst_opt, 2, y_plane_size);
|
||||
|
||||
// Disable all optimizations.
|
||||
MaskCpuFlags(disable_cpu_flags_);
|
||||
for (i = 0; i < benchmark_iterations_; i++) {
|
||||
CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
|
||||
benchmark_width_, benchmark_height_);
|
||||
MaskCpuFlags(disable_cpu_flags);
|
||||
for (int i = 0; i < benchmark_iterations; i++) {
|
||||
CopyPlane(orig_y + off, width, dst_c, width, width, height * invert);
|
||||
}
|
||||
|
||||
// Enable optimizations.
|
||||
MaskCpuFlags(benchmark_cpu_info_);
|
||||
for (i = 0; i < benchmark_iterations_; i++) {
|
||||
CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
|
||||
benchmark_width_, benchmark_height_);
|
||||
MaskCpuFlags(benchmark_cpu_info);
|
||||
for (int i = 0; i < benchmark_iterations; i++) {
|
||||
CopyPlane(orig_y + off, width, dst_opt, width, width, height * invert);
|
||||
}
|
||||
|
||||
for (i = 0; i < y_plane_size; ++i) {
|
||||
EXPECT_EQ(dst_c[i], dst_opt[i]);
|
||||
int max_diff = 0;
|
||||
for (int i = 0; i < y_plane_size; ++i) {
|
||||
int abs_diff = abs(static_cast<int>(dst_c[i]) - static_cast<int>(dst_opt[i]));
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(orig_y);
|
||||
free_aligned_buffer_page_end(dst_c);
|
||||
free_aligned_buffer_page_end(dst_opt);
|
||||
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, CopyPlane_Any) {
|
||||
int max_diff = TestCopyPlane(benchmark_width_ + 1, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, +1, 0);
|
||||
EXPECT_LE(max_diff, 0);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, CopyPlane_Unaligned) {
|
||||
int max_diff =
|
||||
TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
|
||||
EXPECT_LE(max_diff, 0);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, CopyPlane_Invert) {
|
||||
int max_diff =
|
||||
TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
|
||||
EXPECT_LE(max_diff, 0);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
|
||||
int max_diff =
|
||||
TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
|
||||
EXPECT_LE(max_diff, 0);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
|
||||
|
||||
@ -192,6 +192,7 @@ int main(int argc, const char* argv[]) {
|
||||
printf("Has AVX 0x%x\n", has_avx);
|
||||
printf("Has AVX2 0x%x\n", has_avx2);
|
||||
printf("Has ERMS 0x%x\n", has_erms);
|
||||
printf("Has FSMR 0x%x\n", has_fsmr);
|
||||
printf("Has FMA3 0x%x\n", has_fma3);
|
||||
printf("Has F16C 0x%x\n", has_f16c);
|
||||
printf("Has AVX512BW 0x%x\n", has_avx512bw);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user