diff --git a/README.chromium b/README.chromium index 159826180..6e66021d9 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1628 +Version: 1629 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index cebc731db..75406bd7f 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1628 +#define LIBYUV_VERSION 1629 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 03f7f1bdc..8020108d0 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -5350,17 +5350,17 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { // 16 pixel loop. LABELALIGN "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 8 shorts + "vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts "lea " MEMLEA(0x20,0) ",%0 \n" - "vpunpckhwd %%ymm2,%%ymm5,%%ymm3 \n" - "vpunpcklwd %%ymm2,%%ymm5,%%ymm2 \n" + "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates + "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n" "vcvtdq2ps %%ymm3,%%ymm3 \n" "vcvtdq2ps %%ymm2,%%ymm2 \n" "vmulps %%ymm3,%%ymm4,%%ymm3 \n" "vmulps %%ymm2,%%ymm4,%%ymm2 \n" "vpsrld $0xd,%%ymm3,%%ymm3 \n" "vpsrld $0xd,%%ymm2,%%ymm2 \n" - "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates + "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates "vmovdqu %%ymm2," MEMACCESS(1) " \n" "lea " MEMLEA(0x20,1) ",%1 \n" "sub $0x10,%2 \n" @@ -5384,8 +5384,8 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { // 16 pixel loop. LABELALIGN "1: \n" - "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 8 shorts -> 8 ints - "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" // 8 more + "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints + "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" "lea " MEMLEA(0x20,0) ",%0 \n" "vcvtdq2ps %%ymm2,%%ymm2 \n" "vcvtdq2ps %%ymm3,%%ymm3 \n" diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index c552c4a59..a2eb1faac 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -2099,8 +2099,8 @@ int TestHalfFloatPlane(int benchmark_width, int benchmark_height, MaskCpuFlags(disable_cpu_flags); double c_time = get_time(); for (j = 0; j < benchmark_iterations; j++) { - HalfFloatPlane((uint16*)orig_y, benchmark_width * 2, - (uint16*)dst_c, benchmark_width * 2, + HalfFloatPlane(reinterpret_cast(orig_y), benchmark_width * 2, + reinterpret_cast(dst_c), benchmark_width * 2, scale, benchmark_width, benchmark_height); } c_time = (get_time() - c_time) / benchmark_iterations; @@ -2109,8 +2109,8 @@ int TestHalfFloatPlane(int benchmark_width, int benchmark_height, MaskCpuFlags(benchmark_cpu_info); double opt_time = get_time(); for (j = 0; j < benchmark_iterations; j++) { - HalfFloatPlane((uint16*)orig_y, benchmark_width * 2, - (uint16*)dst_opt, benchmark_width * 2, + HalfFloatPlane(reinterpret_cast(orig_y), benchmark_width * 2, + reinterpret_cast(dst_opt), benchmark_width * 2, scale, benchmark_width, benchmark_height); } opt_time = (get_time() - opt_time) / benchmark_iterations;