mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
10/12 bit YUV replicate upper bits to low bits before converting to RGB
- shift high bits of 10 and 12 bit into lower bits Bug: libyuv:941, libyuv:942, Change-Id: I14381dbf226ef27dcce06893ea88860835639baa Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3906085 Reviewed-by: Mirko Bonadei <mbonadei@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
e4b1ddd8fe
commit
8fc02134c8
@ -1723,7 +1723,7 @@ static __inline void YuvPixel10_16(uint16_t y,
|
||||
int* r,
|
||||
const struct YuvConstants* yuvconstants) {
|
||||
LOAD_YUV_CONSTANTS;
|
||||
uint32_t y32 = y << 6;
|
||||
uint32_t y32 = (y << 6) | (y >> 4);
|
||||
u = clamp255(u >> 2);
|
||||
v = clamp255(v >> 2);
|
||||
CALC_RGB16;
|
||||
@ -1742,7 +1742,7 @@ static __inline void YuvPixel12_16(int16_t y,
|
||||
int* r,
|
||||
const struct YuvConstants* yuvconstants) {
|
||||
LOAD_YUV_CONSTANTS;
|
||||
uint32_t y32 = y << 4;
|
||||
uint32_t y32 = (y << 4) | (y >> 8);
|
||||
u = clamp255(u >> 4);
|
||||
v = clamp255(v >> 4);
|
||||
CALC_RGB16;
|
||||
@ -4052,6 +4052,30 @@ void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
// SSSE3 implemented in row_gcc.cc row_win.cc for 32 bit
|
||||
// For row_win Visual C (not clangcl)
|
||||
#if defined(HAS_I422TORGB24ROW_SSSE3) && defined(_M_X64) && !defined(__clang__)
|
||||
void I422ToRGB24Row_SSSE3(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
|
||||
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
|
||||
src_y += twidth;
|
||||
src_u += twidth / 2;
|
||||
src_v += twidth / 2;
|
||||
dst_rgb24 += twidth * 3;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I422TORGB24ROW_AVX2)
|
||||
void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
@ -4078,6 +4102,29 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
// For row_win Visual C (not clangcl)
|
||||
#if defined(HAS_I444TORGB24ROW_SSSE3) && defined(_M_X64) && !defined(__clang__)
|
||||
void I444ToRGB24Row_SSSE3(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
// Row buffer for intermediate ARGB pixels.
|
||||
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
|
||||
while (width > 0) {
|
||||
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
|
||||
I444ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
|
||||
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
|
||||
src_y += twidth;
|
||||
src_u += twidth;
|
||||
src_v += twidth;
|
||||
dst_rgb24 += twidth * 3;
|
||||
width -= twidth;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_I444TORGB24ROW_AVX2)
|
||||
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
|
||||
@ -2348,7 +2348,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||
"packuswb %%xmm3,%%xmm3 \n" \
|
||||
"punpcklwd %%xmm3,%%xmm3 \n" \
|
||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||
"movdqa %%xmm4,%%xmm2 \n" \
|
||||
"psllw $6,%%xmm4 \n" \
|
||||
"psraw $4,%%xmm2 \n" \
|
||||
"paddw %%xmm2,%%xmm4 \n" \
|
||||
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
#define READYUVA210 \
|
||||
@ -2360,7 +2363,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||
"packuswb %%xmm3,%%xmm3 \n" \
|
||||
"punpcklwd %%xmm3,%%xmm3 \n" \
|
||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||
"movdqa %%xmm4,%%xmm2 \n" \
|
||||
"psllw $6,%%xmm4 \n" \
|
||||
"psraw $4,%%xmm2 \n" \
|
||||
"paddw %%xmm2,%%xmm4 \n" \
|
||||
"lea 0x10(%[y_buf]),%[y_buf] \n" \
|
||||
"movdqu (%[a_buf]),%%xmm5 \n" \
|
||||
"psraw $2,%%xmm5 \n" \
|
||||
@ -2379,7 +2385,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||
"punpckhwd %%xmm2,%%xmm1 \n" \
|
||||
"packuswb %%xmm1,%%xmm3 \n" \
|
||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||
"movdqa %%xmm4,%%xmm2 \n" \
|
||||
"psllw $6,%%xmm4 \n" \
|
||||
"psraw $4,%%xmm2 \n" \
|
||||
"paddw %%xmm2,%%xmm4 \n" \
|
||||
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
// Read 8 UV from 444 10 bit. With 8 Alpha.
|
||||
@ -2394,7 +2403,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||
"punpckhwd %%xmm2,%%xmm1 \n" \
|
||||
"packuswb %%xmm1,%%xmm3 \n" \
|
||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||
"psllw $0x6,%%xmm4 \n" \
|
||||
"movdqa %%xmm4,%%xmm2 \n" \
|
||||
"psllw $6,%%xmm4 \n" \
|
||||
"psraw $4,%%xmm2 \n" \
|
||||
"paddw %%xmm2,%%xmm4 \n" \
|
||||
"lea 0x10(%[y_buf]),%[y_buf] \n" \
|
||||
"movdqu (%[a_buf]),%%xmm5 \n" \
|
||||
"psraw $2,%%xmm5 \n" \
|
||||
@ -2411,7 +2423,10 @@ void RGBAToUVRow_SSSE3(const uint8_t* src_rgba,
|
||||
"packuswb %%xmm3,%%xmm3 \n" \
|
||||
"punpcklwd %%xmm3,%%xmm3 \n" \
|
||||
"movdqu (%[y_buf]),%%xmm4 \n" \
|
||||
"psllw $0x4,%%xmm4 \n" \
|
||||
"movdqa %%xmm4,%%xmm2 \n" \
|
||||
"psllw $6,%%xmm4 \n" \
|
||||
"psraw $4,%%xmm2 \n" \
|
||||
"paddw %%xmm2,%%xmm4 \n" \
|
||||
"lea 0x10(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
|
||||
@ -3432,7 +3447,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
||||
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||
"vpsllw $6,%%ymm4,%%ymm4 \n" \
|
||||
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
||||
"vpsraw $4,%%ymm4,%%ymm4 \n" \
|
||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
// Read 8 UV from 210, upsample to 16 UV. With 16 Alpha.
|
||||
@ -3447,7 +3464,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
||||
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||
"vpsllw $6,%%ymm4,%%ymm4 \n" \
|
||||
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
||||
"vpsraw $4,%%ymm4,%%ymm4 \n" \
|
||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||
"lea 0x20(%[y_buf]),%[y_buf] \n" \
|
||||
"vmovdqu (%[a_buf]),%%ymm5 \n" \
|
||||
"vpsraw $2,%%ymm5,%%ymm5 \n" \
|
||||
@ -3465,7 +3484,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
||||
"vpunpcklwd %%ymm2,%%ymm3,%%ymm3 \n" \
|
||||
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
|
||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||
"vpsllw $6,%%ymm4,%%ymm4 \n" \
|
||||
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
||||
"vpsraw $4,%%ymm4,%%ymm4 \n" \
|
||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
// Read 8 UV from 212 12 bit, upsample to 16 UV
|
||||
@ -3480,7 +3501,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
||||
"vpackuswb %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||
"vpunpcklwd %%ymm3,%%ymm3,%%ymm3 \n" \
|
||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||
"vpsllw $0x4,%%ymm4,%%ymm4 \n" \
|
||||
"vpsllw $4,%%ymm4,%%ymm2 \n" \
|
||||
"vpsraw $8,%%ymm4,%%ymm4 \n" \
|
||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||
"lea 0x20(%[y_buf]),%[y_buf] \n"
|
||||
|
||||
// Read 16 UV from 410. With 16 Alpha.
|
||||
@ -3494,7 +3517,9 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
||||
"vpunpcklwd %%ymm2,%%ymm3,%%ymm3 \n" \
|
||||
"vpackuswb %%ymm1,%%ymm3,%%ymm3 \n" \
|
||||
"vmovdqu (%[y_buf]),%%ymm4 \n" \
|
||||
"vpsllw $6,%%ymm4,%%ymm4 \n" \
|
||||
"vpsllw $6,%%ymm4,%%ymm2 \n" \
|
||||
"vpsraw $4,%%ymm4,%%ymm4 \n" \
|
||||
"vpaddw %%ymm2,%%ymm4,%%ymm4 \n" \
|
||||
"lea 0x20(%[y_buf]),%[y_buf] \n" \
|
||||
"vmovdqu (%[a_buf]),%%ymm5 \n" \
|
||||
"vpsraw $2,%%ymm5,%%ymm5 \n" \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user