mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Fix shift amount for SSSE3 assembly for I012 format conversions
Bug: libyuv:938, libyuv:942 Change-Id: I6fb6e7e17fa941785e398bc630f465baf72fcabd Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3906091 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
8fc02134c8
commit
f9fda6e7d8
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1843
|
Version: 1844
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1843
|
#define LIBYUV_VERSION 1844
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -4052,30 +4052,6 @@ void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// SSSE3 implemented in row_gcc.cc row_win.cc for 32 bit
|
|
||||||
// For row_win Visual C (not clangcl)
|
|
||||||
#if defined(HAS_I422TORGB24ROW_SSSE3) && defined(_M_X64) && !defined(__clang__)
|
|
||||||
void I422ToRGB24Row_SSSE3(const uint8_t* src_y,
|
|
||||||
const uint8_t* src_u,
|
|
||||||
const uint8_t* src_v,
|
|
||||||
uint8_t* dst_rgb24,
|
|
||||||
const struct YuvConstants* yuvconstants,
|
|
||||||
int width) {
|
|
||||||
// Row buffer for intermediate ARGB pixels.
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
|
||||||
while (width > 0) {
|
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
|
||||||
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
|
|
||||||
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
|
|
||||||
src_y += twidth;
|
|
||||||
src_u += twidth / 2;
|
|
||||||
src_v += twidth / 2;
|
|
||||||
dst_rgb24 += twidth * 3;
|
|
||||||
width -= twidth;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(HAS_I422TORGB24ROW_AVX2)
|
#if defined(HAS_I422TORGB24ROW_AVX2)
|
||||||
void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||||
const uint8_t* src_u,
|
const uint8_t* src_u,
|
||||||
@ -4102,29 +4078,6 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// For row_win Visual C (not clangcl)
|
|
||||||
#if defined(HAS_I444TORGB24ROW_SSSE3) && defined(_M_X64) && !defined(__clang__)
|
|
||||||
void I444ToRGB24Row_SSSE3(const uint8_t* src_y,
|
|
||||||
const uint8_t* src_u,
|
|
||||||
const uint8_t* src_v,
|
|
||||||
uint8_t* dst_rgb24,
|
|
||||||
const struct YuvConstants* yuvconstants,
|
|
||||||
int width) {
|
|
||||||
// Row buffer for intermediate ARGB pixels.
|
|
||||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
|
||||||
while (width > 0) {
|
|
||||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
|
||||||
I444ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
|
|
||||||
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
|
|
||||||
src_y += twidth;
|
|
||||||
src_u += twidth;
|
|
||||||
src_v += twidth;
|
|
||||||
dst_rgb24 += twidth * 3;
|
|
||||||
width -= twidth;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(HAS_I444TORGB24ROW_AVX2)
|
#if defined(HAS_I444TORGB24ROW_AVX2)
|
||||||
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
|
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||||
const uint8_t* src_u,
|
const uint8_t* src_u,
|
||||||
|
|||||||
@ -2336,9 +2336,6 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
|||||||
"lea 0x8(%[y_buf]),%[y_buf] \n"
|
"lea 0x8(%[y_buf]),%[y_buf] \n"
|
||||||
|
|
||||||
// Read 4 UV from 422 10 bit, upsample to 8 UV
|
// Read 4 UV from 422 10 bit, upsample to 8 UV
|
||||||
// TODO(fbarchard): Consider shufb to replace pack/unpack
|
|
||||||
// TODO(fbarchard): Consider pmulhuw to replace psraw
|
|
||||||
// TODO(fbarchard): Consider pmullw to replace psllw and allow different bits.
|
|
||||||
#define READYUV210 \
|
#define READYUV210 \
|
||||||
"movq (%[u_buf]),%%xmm3 \n" \
|
"movq (%[u_buf]),%%xmm3 \n" \
|
||||||
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
|
"movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \
|
||||||
@ -2350,7 +2347,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
|||||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||||
"movdqa %%xmm4,%%xmm2 \n" \
|
"movdqa %%xmm4,%%xmm2 \n" \
|
||||||
"psllw $6,%%xmm4 \n" \
|
"psllw $6,%%xmm4 \n" \
|
||||||
"psraw $4,%%xmm2 \n" \
|
"psrlw $4,%%xmm2 \n" \
|
||||||
"paddw %%xmm2,%%xmm4 \n" \
|
"paddw %%xmm2,%%xmm4 \n" \
|
||||||
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
||||||
|
|
||||||
@ -2365,7 +2362,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
|||||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||||
"movdqa %%xmm4,%%xmm2 \n" \
|
"movdqa %%xmm4,%%xmm2 \n" \
|
||||||
"psllw $6,%%xmm4 \n" \
|
"psllw $6,%%xmm4 \n" \
|
||||||
"psraw $4,%%xmm2 \n" \
|
"psrlw $4,%%xmm2 \n" \
|
||||||
"paddw %%xmm2,%%xmm4 \n" \
|
"paddw %%xmm2,%%xmm4 \n" \
|
||||||
"lea 0x10(%[y_buf]),%[y_buf] \n" \
|
"lea 0x10(%[y_buf]),%[y_buf] \n" \
|
||||||
"movdqu (%[a_buf]),%%xmm5 \n" \
|
"movdqu (%[a_buf]),%%xmm5 \n" \
|
||||||
@ -2387,7 +2384,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
|||||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||||
"movdqa %%xmm4,%%xmm2 \n" \
|
"movdqa %%xmm4,%%xmm2 \n" \
|
||||||
"psllw $6,%%xmm4 \n" \
|
"psllw $6,%%xmm4 \n" \
|
||||||
"psraw $4,%%xmm2 \n" \
|
"psrlw $4,%%xmm2 \n" \
|
||||||
"paddw %%xmm2,%%xmm4 \n" \
|
"paddw %%xmm2,%%xmm4 \n" \
|
||||||
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
||||||
|
|
||||||
@ -2405,7 +2402,7 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
|||||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||||
"movdqa %%xmm4,%%xmm2 \n" \
|
"movdqa %%xmm4,%%xmm2 \n" \
|
||||||
"psllw $6,%%xmm4 \n" \
|
"psllw $6,%%xmm4 \n" \
|
||||||
"psraw $4,%%xmm2 \n" \
|
"psrlw $4,%%xmm2 \n" \
|
||||||
"paddw %%xmm2,%%xmm4 \n" \
|
"paddw %%xmm2,%%xmm4 \n" \
|
||||||
"lea 0x10(%[y_buf]),%[y_buf] \n" \
|
"lea 0x10(%[y_buf]),%[y_buf] \n" \
|
||||||
"movdqu (%[a_buf]),%%xmm5 \n" \
|
"movdqu (%[a_buf]),%%xmm5 \n" \
|
||||||
@ -2424,8 +2421,8 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
|||||||
"punpcklwd %%xmm3,%%xmm3 \n" \
|
"punpcklwd %%xmm3,%%xmm3 \n" \
|
||||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||||
"movdqa %%xmm4,%%xmm2 \n" \
|
"movdqa %%xmm4,%%xmm2 \n" \
|
||||||
"psllw $6,%%xmm4 \n" \
|
"psllw $4,%%xmm4 \n" \
|
||||||
"psraw $4,%%xmm2 \n" \
|
"psrlw $8,%%xmm2 \n" \
|
||||||
"paddw %%xmm2,%%xmm4 \n" \
|
"paddw %%xmm2,%%xmm4 \n" \
|
||||||
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
||||||
|
|
||||||
@ -3448,7 +3445,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||||
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
||||||
"vpsraw $4,%%ymm4,%%ymm4 \n" \
|
"vpsrlw $4,%%ymm4,%%ymm4 \n" \
|
||||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||||
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
||||||
|
|
||||||
@ -3465,7 +3462,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||||
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
||||||
"vpsraw $4,%%ymm4,%%ymm4 \n" \
|
"vpsrlw $4,%%ymm4,%%ymm4 \n" \
|
||||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||||
"lea 0x20(%[y_buf]),%[y_buf] \n" \
|
"lea 0x20(%[y_buf]),%[y_buf] \n" \
|
||||||
"vmovdqu (%[a_buf]),%%ymm5 \n" \
|
"vmovdqu (%[a_buf]),%%ymm5 \n" \
|
||||||
@ -3485,7 +3482,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
|
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
|
||||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||||
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
||||||
"vpsraw $4,%%ymm4,%%ymm4 \n" \
|
"vpsrlw $4,%%ymm4,%%ymm4 \n" \
|
||||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||||
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
||||||
|
|
||||||
@ -3502,7 +3499,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||||
"vpsllw $4,%%ymm4,%%ymm2 \n" \
|
"vpsllw $4,%%ymm4,%%ymm2 \n" \
|
||||||
"vpsraw $8,%%ymm4,%%ymm4 \n" \
|
"vpsrlw $8,%%ymm4,%%ymm4 \n" \
|
||||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||||
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
||||||
|
|
||||||
@ -3518,7 +3515,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
|
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
|
||||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||||
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
||||||
"vpsraw $4,%%ymm4,%%ymm4 \n" \
|
"vpsrlw $4,%%ymm4,%%ymm4 \n" \
|
||||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||||
"lea 0x20(%[y_buf]),%[y_buf] \n" \
|
"lea 0x20(%[y_buf]),%[y_buf] \n" \
|
||||||
"vmovdqu (%[a_buf]),%%ymm5 \n" \
|
"vmovdqu (%[a_buf]),%%ymm5 \n" \
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user