Fix shift amount for SSSE3 assembly for I012 format conversions

Bug: libyuv:938, libyuv:942
Change-Id: I6fb6e7e17fa941785e398bc630f465baf72fcabd
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3906091
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2022-09-20 15:48:12 -07:00 committed by Frank Barchard
parent 8fc02134c8
commit f9fda6e7d8
4 changed files with 13 additions and 63 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1843
Version: 1844
License: BSD
License File: LICENSE

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1843
#define LIBYUV_VERSION 1844
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -4052,30 +4052,6 @@ void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
}
#endif
// SSSE3 implemented in row_gcc.cc row_win.cc for 32 bit
// For row_win Visual C (not clangcl)
#if defined(HAS_I422TORGB24ROW_SSSE3) && defined(_M_X64) && !defined(__clang__)
void I422ToRGB24Row_SSSE3(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
dst_rgb24 += twidth * 3;
width -= twidth;
}
}
#endif
#if defined(HAS_I422TORGB24ROW_AVX2)
void I422ToRGB24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_u,
@ -4102,29 +4078,6 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
}
#endif
// For row_win Visual C (not clangcl)
#if defined(HAS_I444TORGB24ROW_SSSE3) && defined(_M_X64) && !defined(__clang__)
void I444ToRGB24Row_SSSE3(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I444ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
src_y += twidth;
src_u += twidth;
src_v += twidth;
dst_rgb24 += twidth * 3;
width -= twidth;
}
}
#endif
#if defined(HAS_I444TORGB24ROW_AVX2)
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_u,

View File

@ -2336,9 +2336,6 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"lea 0x8(%[y_buf]),%[y_buf] \n"
// Read 4 UV from 422 10 bit, upsample to 8 UV
// TODO(fbarchard): Consider shufb to replace pack/unpack
// TODO(fbarchard): Consider pmulhuw to replace psraw
// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits.
#define READYUV210 \
"movq (%[u_buf]),%%xmm3 \n" \
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
@ -2350,7 +2347,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"movdqu (%[y_buf]),%%xmm4 \n" \
"movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
"psraw $4,%%xmm2 \n" \
"psrlw $4,%%xmm2 \n" \
"paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
@ -2365,7 +2362,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"movdqu (%[y_buf]),%%xmm4 \n" \
"movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
"psraw $4,%%xmm2 \n" \
"psrlw $4,%%xmm2 \n" \
"paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n" \
"movdqu (%[a_buf]),%%xmm5 \n" \
@ -2387,7 +2384,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"movdqu (%[y_buf]),%%xmm4 \n" \
"movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
"psraw $4,%%xmm2 \n" \
"psrlw $4,%%xmm2 \n" \
"paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
@ -2405,7 +2402,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"movdqu (%[y_buf]),%%xmm4 \n" \
"movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
"psraw $4,%%xmm2 \n" \
"psrlw $4,%%xmm2 \n" \
"paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n" \
"movdqu (%[a_buf]),%%xmm5 \n" \
@ -2424,8 +2421,8 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
"punpcklwd %%xmm3,%%xmm3 \n" \
"movdqu (%[y_buf]),%%xmm4 \n" \
"movdqa %%xmm4,%%xmm2 \n" \
"psllw $6,%%xmm4 \n" \
"psraw $4,%%xmm2 \n" \
"psllw $4,%%xmm4 \n" \
"psrlw $8,%%xmm2 \n" \
"paddw %%xmm2,%%xmm4 \n" \
"lea 0x10(%[y_buf]),%[y_buf] \n"
@ -3448,7 +3445,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $6,%%ymm4,%%ymm2 \n" \
"vpsraw $4,%%ymm4,%%ymm4 \n" \
"vpsrlw $4,%%ymm4,%%ymm4 \n" \
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
@ -3465,7 +3462,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $6,%%ymm4,%%ymm2 \n" \
"vpsraw $4,%%ymm4,%%ymm4 \n" \
"vpsrlw $4,%%ymm4,%%ymm4 \n" \
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" \
"vmovdqu (%[a_buf]),%%ymm5 \n" \
@ -3485,7 +3482,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $6,%%ymm4,%%ymm2 \n" \
"vpsraw $4,%%ymm4,%%ymm4 \n" \
"vpsrlw $4,%%ymm4,%%ymm4 \n" \
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
@ -3502,7 +3499,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $4,%%ymm4,%%ymm2 \n" \
"vpsraw $8,%%ymm4,%%ymm4 \n" \
"vpsrlw $8,%%ymm4,%%ymm4 \n" \
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n"
@ -3518,7 +3515,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
"vmovdqu (%[y_buf]),%%ymm4 \n" \
"vpsllw $6,%%ymm4,%%ymm2 \n" \
"vpsraw $4,%%ymm4,%%ymm4 \n" \
"vpsrlw $4,%%ymm4,%%ymm4 \n" \
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
"lea 0x20(%[y_buf]),%[y_buf] \n" \
"vmovdqu (%[a_buf]),%%ymm5 \n" \