diff --git a/README.chromium b/README.chromium index 0479b5fde..8eecc5338 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1843 +Version: 1844 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 870c98085..699d13f91 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1843 +#define LIBYUV_VERSION 1844 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/row_common.cc b/source/row_common.cc index 20eb48ec0..50be62878 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -4052,30 +4052,6 @@ void I422ToARGB4444Row_AVX2(const uint8_t* src_y, } #endif -// SSSE3 implemented in row_gcc.cc row_win.cc for 32 bit -// For row_win Visual C (not clangcl) -#if defined(HAS_I422TORGB24ROW_SSSE3) && defined(_M_X64) && !defined(__clang__) -void I422ToRGB24Row_SSSE3(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); - ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_rgb24 += twidth * 3; - width -= twidth; - } -} -#endif - #if defined(HAS_I422TORGB24ROW_AVX2) void I422ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, @@ -4102,29 +4078,6 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y, } #endif -// For row_win Visual C (not clangcl) -#if defined(HAS_I444TORGB24ROW_SSSE3) && defined(_M_X64) && !defined(__clang__) -void I444ToRGB24Row_SSSE3(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I444ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); - ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); - src_y += twidth; - src_u += twidth; - src_v += twidth; - dst_rgb24 += twidth * 3; - width -= twidth; - } -} -#endif - #if defined(HAS_I444TORGB24ROW_AVX2) void I444ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, diff --git a/source/row_gcc.cc b/source/row_gcc.cc index fa1b54793..5a8a492d1 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -2336,9 +2336,6 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422 10 bit, upsample to 8 UV -// TODO(fbarchard): Consider shufb to replace pack/unpack -// TODO(fbarchard): Consider pmulhuw to replace psraw -// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits. #define READYUV210 \ "movq (%[u_buf]),%%xmm3 \n" \ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ @@ -2350,7 +2347,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "movdqu (%[y_buf]),%%xmm4 \n" \ "movdqa %%xmm4,%%xmm2 \n" \ "psllw $6,%%xmm4 \n" \ - "psraw $4,%%xmm2 \n" \ + "psrlw $4,%%xmm2 \n" \ "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" @@ -2365,7 +2362,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "movdqu (%[y_buf]),%%xmm4 \n" \ "movdqa %%xmm4,%%xmm2 \n" \ "psllw $6,%%xmm4 \n" \ - "psraw $4,%%xmm2 \n" \ + "psrlw $4,%%xmm2 \n" \ "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" \ "movdqu (%[a_buf]),%%xmm5 \n" \ @@ -2387,7 +2384,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "movdqu (%[y_buf]),%%xmm4 \n" \ "movdqa %%xmm4,%%xmm2 \n" \ "psllw $6,%%xmm4 \n" \ - "psraw $4,%%xmm2 \n" \ + "psrlw $4,%%xmm2 \n" \ "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" @@ -2405,7 +2402,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "movdqu (%[y_buf]),%%xmm4 \n" \ "movdqa %%xmm4,%%xmm2 \n" \ "psllw $6,%%xmm4 \n" \ - "psraw $4,%%xmm2 \n" \ + "psrlw $4,%%xmm2 \n" \ "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" \ "movdqu (%[a_buf]),%%xmm5 \n" \ @@ -2424,8 +2421,8 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, "punpcklwd %%xmm3,%%xmm3 \n" \ "movdqu (%[y_buf]),%%xmm4 \n" \ "movdqa %%xmm4,%%xmm2 \n" \ - "psllw $6,%%xmm4 \n" \ - "psraw $4,%%xmm2 \n" \ + "psllw $4,%%xmm4 \n" \ + "psrlw $8,%%xmm2 \n" \ "paddw %%xmm2,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" @@ -3448,7 +3445,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ "vpsllw $6,%%ymm4,%%ymm2 \n" \ - "vpsraw $4,%%ymm4,%%ymm4 \n" \ + "vpsrlw $4,%%ymm4,%%ymm4 \n" \ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" @@ -3465,7 +3462,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ "vpsllw $6,%%ymm4,%%ymm2 \n" \ - "vpsraw $4,%%ymm4,%%ymm4 \n" \ + "vpsrlw $4,%%ymm4,%%ymm4 \n" \ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" \ "vmovdqu (%[a_buf]),%%ymm5 \n" \ @@ -3485,7 +3482,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ "vpsllw $6,%%ymm4,%%ymm2 \n" \ - "vpsraw $4,%%ymm4,%%ymm4 \n" \ + "vpsrlw $4,%%ymm4,%%ymm4 \n" \ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" @@ -3502,7 +3499,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ "vpsllw $4,%%ymm4,%%ymm2 \n" \ - "vpsraw $8,%%ymm4,%%ymm4 \n" \ + "vpsrlw $8,%%ymm4,%%ymm4 \n" \ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" @@ -3518,7 +3515,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, "vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ "vpsllw $6,%%ymm4,%%ymm2 \n" \ - "vpsraw $4,%%ymm4,%%ymm4 \n" \ + "vpsrlw $4,%%ymm4,%%ymm4 \n" \ "vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" \ "vmovdqu (%[a_buf]),%%ymm5 \n" \