mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
HalfFloat avx2 unpack bug fix.
AVX unpack parameters were reverse ordered causing incorrect results on AVX2 hardware. TEST=/usr/local/google/home/fbarchard/intelsde/sde -skx -- out/Release/libyuv_unittest --gtest_filter=*Half* BUG=libyuv:560 R=wangcheng@google.com Review URL: https://codereview.chromium.org/2438893002 .
This commit is contained in:
parent
f553db2d30
commit
550cf829fb
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1628
|
||||
Version: 1629
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1628
|
||||
#define LIBYUV_VERSION 1629
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -5350,17 +5350,17 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
|
||||
// 16 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 8 shorts
|
||||
"vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"vpunpckhwd %%ymm2,%%ymm5,%%ymm3 \n"
|
||||
"vpunpcklwd %%ymm2,%%ymm5,%%ymm2 \n"
|
||||
"vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates
|
||||
"vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n"
|
||||
"vcvtdq2ps %%ymm3,%%ymm3 \n"
|
||||
"vcvtdq2ps %%ymm2,%%ymm2 \n"
|
||||
"vmulps %%ymm3,%%ymm4,%%ymm3 \n"
|
||||
"vmulps %%ymm2,%%ymm4,%%ymm2 \n"
|
||||
"vpsrld $0xd,%%ymm3,%%ymm3 \n"
|
||||
"vpsrld $0xd,%%ymm2,%%ymm2 \n"
|
||||
"vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates
|
||||
"vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates
|
||||
"vmovdqu %%ymm2," MEMACCESS(1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
@ -5384,8 +5384,8 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
|
||||
// 16 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 8 shorts -> 8 ints
|
||||
"vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" // 8 more
|
||||
"vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints
|
||||
"vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"vcvtdq2ps %%ymm2,%%ymm2 \n"
|
||||
"vcvtdq2ps %%ymm3,%%ymm3 \n"
|
||||
|
||||
@ -2099,8 +2099,8 @@ int TestHalfFloatPlane(int benchmark_width, int benchmark_height,
|
||||
MaskCpuFlags(disable_cpu_flags);
|
||||
double c_time = get_time();
|
||||
for (j = 0; j < benchmark_iterations; j++) {
|
||||
HalfFloatPlane((uint16*)orig_y, benchmark_width * 2,
|
||||
(uint16*)dst_c, benchmark_width * 2,
|
||||
HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
|
||||
reinterpret_cast<uint16*>(dst_c), benchmark_width * 2,
|
||||
scale, benchmark_width, benchmark_height);
|
||||
}
|
||||
c_time = (get_time() - c_time) / benchmark_iterations;
|
||||
@ -2109,8 +2109,8 @@ int TestHalfFloatPlane(int benchmark_width, int benchmark_height,
|
||||
MaskCpuFlags(benchmark_cpu_info);
|
||||
double opt_time = get_time();
|
||||
for (j = 0; j < benchmark_iterations; j++) {
|
||||
HalfFloatPlane((uint16*)orig_y, benchmark_width * 2,
|
||||
(uint16*)dst_opt, benchmark_width * 2,
|
||||
HalfFloatPlane(reinterpret_cast<uint16*>(orig_y), benchmark_width * 2,
|
||||
reinterpret_cast<uint16*>(dst_opt), benchmark_width * 2,
|
||||
scale, benchmark_width, benchmark_height);
|
||||
}
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user