mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
H010ToAR30 in 1 step with SSSE3 assembly
Switch YUV conversion macro to output 16 bits per channel. STOREAR30 macro to output AR30. [ RUN ] LibYUVConvertTest.TestH420ToARGB uniques: B 220, G, 220, R 220 [ OK ] LibYUVConvertTest.TestH420ToARGB (0 ms) [ RUN ] LibYUVConvertTest.TestH010ToARGB uniques: B 256, G, 256, R 256 [ OK ] LibYUVConvertTest.TestH010ToARGB (0 ms) [ RUN ] LibYUVConvertTest.TestH010ToAR30 uniques: B 883, G, 883, R 883 [ OK ] LibYUVConvertTest.TestH010ToAR30 (0 ms) Bug: libyuv:751 Test: LibYUVConvertTest.H010ToAR30_Opt Change-Id: I902b718e2c8b68ede69625ccafebc6519d5af70d Reviewed-on: https://chromium-review.googlesource.com/869511 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Miguel Casas <mcasas@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
37f9721052
commit
09db0c4ce2
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1689
|
||||
Version: 1690
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -420,6 +420,19 @@ int H010ToARGB(const uint16* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I010 to AR30.
|
||||
LIBYUV_API
|
||||
int I010ToAR30(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H010 to AR30.
|
||||
LIBYUV_API
|
||||
int H010ToAR30(const uint16* src_y,
|
||||
|
||||
@ -256,6 +256,7 @@ extern "C" {
|
||||
#define HAS_CONVERT16TO8ROW_SSSE3
|
||||
#define HAS_CONVERT8TO16ROW_SSE2
|
||||
// I210 is for H010. 2 = 422. I for 601 vs H for 709.
|
||||
#define HAS_I210TOAR30ROW_SSSE3
|
||||
#define HAS_I210TOARGBROW_SSSE3
|
||||
#define HAS_MERGERGBROW_SSSE3
|
||||
#define HAS_SPLITRGBROW_SSSE3
|
||||
@ -1682,6 +1683,12 @@ void I422ToARGBRow_C(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I210ToAR30Row_C(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint8* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I210ToARGBRow_C(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
@ -1791,6 +1798,12 @@ void I422ToARGBRow_SSSE3(const uint8* src_y,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
|
||||
void I210ToAR30Row_SSSE3(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint8* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I210ToARGBRow_SSSE3(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
@ -1947,6 +1960,12 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I210ToAR30Row_Any_SSSE3(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint8* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void I210ToARGBRow_Any_SSSE3(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1689
|
||||
#define LIBYUV_VERSION 1690
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -413,7 +413,7 @@ int H422ToABGR(const uint8* src_y,
|
||||
// Convert 10 bit YUV to ARGB with matrix
|
||||
// TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to
|
||||
// multiply 10 bit yuv into high bits to allow any number of bits.
|
||||
static int H010ToAR30Matrix(const uint16* src_y,
|
||||
static int I010ToAR30Matrix(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
@ -425,12 +425,10 @@ static int H010ToAR30Matrix(const uint16* src_y,
|
||||
int width,
|
||||
int height) {
|
||||
int y;
|
||||
void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf,
|
||||
void (*I210ToAR30Row)(const uint16* y_buf, const uint16* u_buf,
|
||||
const uint16* v_buf, uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants, int width) =
|
||||
I210ToARGBRow_C;
|
||||
void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
|
||||
ARGBToAR30Row_C;
|
||||
I210ToAR30Row_C;
|
||||
if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@ -440,46 +438,24 @@ static int H010ToAR30Matrix(const uint16* src_y,
|
||||
dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
|
||||
dst_stride_ar30 = -dst_stride_ar30;
|
||||
}
|
||||
#if defined(HAS_I210TOARGBROW_SSSE3)
|
||||
#if defined(HAS_I210TOAR30ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I210ToARGBRow = I210ToARGBRow_Any_SSSE3;
|
||||
I210ToAR30Row = I210ToAR30Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I210ToARGBRow = I210ToARGBRow_SSSE3;
|
||||
I210ToAR30Row = I210ToAR30Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I210TOARGBROW_AVX2)
|
||||
#if defined(HAS_I210TOAR30ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
I210ToARGBRow = I210ToARGBRow_Any_AVX2;
|
||||
I210ToAR30Row = I210ToAR30Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I210ToARGBRow = I210ToARGBRow_AVX2;
|
||||
I210ToAR30Row = I210ToAR30Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOAR30ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
ARGBToAR30Row = ARGBToAR30Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOAR30ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
ARGBToAR30Row = ARGBToAR30Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToAR30Row = ARGBToAR30Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
// Row buffers for 8 bit YUV and RGB.
|
||||
align_buffer_64(row_argb, width * 4);
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I210ToARGBRow(src_y, src_u, src_v, row_argb, yuvconstants, width);
|
||||
ARGBToAR30Row(row_argb, dst_ar30, width);
|
||||
I210ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width);
|
||||
dst_ar30 += dst_stride_ar30;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
@ -487,13 +463,26 @@ static int H010ToAR30Matrix(const uint16* src_y,
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_64(row_argb);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I010 to AR30.
|
||||
LIBYUV_API
|
||||
int I010ToAR30(const uint16* src_y,
|
||||
int src_stride_y,
|
||||
const uint16* src_u,
|
||||
int src_stride_u,
|
||||
const uint16* src_v,
|
||||
int src_stride_v,
|
||||
uint8* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height) {
|
||||
return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_ar30, dst_stride_ar30,
|
||||
&kYuvI601Constants, width, height);
|
||||
}
|
||||
|
||||
// Convert H010 to AR30.
|
||||
LIBYUV_API
|
||||
int H010ToAR30(const uint16* src_y,
|
||||
@ -506,7 +495,7 @@ int H010ToAR30(const uint16* src_y,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height) {
|
||||
return H010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_ar30, dst_stride_ar30,
|
||||
&kYuvH709Constants, width, height);
|
||||
}
|
||||
|
||||
@ -214,6 +214,9 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
|
||||
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_I210TOAR30ROW_SSSE3
|
||||
ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_I210TOARGBROW_SSSE3
|
||||
ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16, 2, 4, 7)
|
||||
#endif
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include <string.h> // For memcpy and memset.
|
||||
#include <stdio.h>
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
@ -31,9 +32,8 @@ static __inline int32 clamp255(int32 v) {
|
||||
return (((255 - (v)) >> 31) | (v)) & 255;
|
||||
}
|
||||
|
||||
static __inline uint32 Clamp(int32 val) {
|
||||
int v = clamp0(val);
|
||||
return (uint32)(clamp255(v));
|
||||
static __inline int32 clamp1023(int32 v) {
|
||||
return (((1023 - (v)) >> 31) | (v)) & 1023;
|
||||
}
|
||||
|
||||
static __inline uint32 Abs(int32 v) {
|
||||
@ -49,15 +49,23 @@ static __inline int32 clamp255(int32 v) {
|
||||
return (v > 255) ? 255 : v;
|
||||
}
|
||||
|
||||
static __inline uint32 Clamp(int32 val) {
|
||||
int v = clamp0(val);
|
||||
return (uint32)(clamp255(v));
|
||||
static __inline int32 clamp1023(int32 v) {
|
||||
return (v > 1023) ? 1023 : v;
|
||||
}
|
||||
|
||||
static __inline uint32 Abs(int32 v) {
|
||||
return (v < 0) ? -v : v;
|
||||
}
|
||||
#endif // USE_BRANCHLESS
|
||||
static __inline uint32 Clamp(int32 val) {
|
||||
int v = clamp0(val);
|
||||
return (uint32)(clamp255(v));
|
||||
}
|
||||
|
||||
static __inline uint32 Clamp10(int32 val) {
|
||||
int v = clamp0(val);
|
||||
return (uint32)(clamp1023(v));
|
||||
}
|
||||
|
||||
#ifdef LIBYUV_LITTLE_ENDIAN
|
||||
#define WRITEWORD(p, v) *(uint32*)(p) = v
|
||||
@ -1340,6 +1348,56 @@ static __inline void YuvPixel10(uint16 y,
|
||||
*r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
|
||||
}
|
||||
|
||||
// C reference code that mimics the YUV 16 bit assembly.
|
||||
static __inline void YuvPixel16(int16 y,
|
||||
int16 u,
|
||||
int16 v,
|
||||
int* b,
|
||||
int* g,
|
||||
int* r,
|
||||
const struct YuvConstants* yuvconstants) {
|
||||
#if defined(__aarch64__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[1];
|
||||
int vr = -yuvconstants->kUVToRB[1];
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
#elif defined(__arm__)
|
||||
int ub = -yuvconstants->kUVToRB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[4];
|
||||
int vr = -yuvconstants->kUVToRB[4];
|
||||
int bb = yuvconstants->kUVBiasBGR[0];
|
||||
int bg = yuvconstants->kUVBiasBGR[1];
|
||||
int br = yuvconstants->kUVBiasBGR[2];
|
||||
int yg = yuvconstants->kYToRgb[0] / 0x0101;
|
||||
#else
|
||||
int ub = yuvconstants->kUVToB[0];
|
||||
int ug = yuvconstants->kUVToG[0];
|
||||
int vg = yuvconstants->kUVToG[1];
|
||||
int vr = yuvconstants->kUVToR[1];
|
||||
int bb = yuvconstants->kUVBiasB[0];
|
||||
int bg = yuvconstants->kUVBiasG[0];
|
||||
int br = yuvconstants->kUVBiasR[0];
|
||||
int yg = yuvconstants->kYToRgb[0];
|
||||
#endif
|
||||
|
||||
uint32 y1 = (uint32)((y << 6) * yg) >> 16;
|
||||
u = clamp255(u >> 2);
|
||||
v = clamp255(v >> 2);
|
||||
*b = (int)(-(u * ub) + y1 + bb);
|
||||
*g = (int)(-(u * ug + v * vg) + y1 + bg);
|
||||
*r = (int)(-(v * vr) + y1 + br);
|
||||
|
||||
if ((int16)(*b & 0xffff) != *b) {
|
||||
printf("%d vs %d bb %d y1 %d\n",(int16)*b, *b, bb, y1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Y contribution to R,G,B. Scale and bias.
|
||||
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
|
||||
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
|
||||
@ -1460,6 +1518,48 @@ void I210ToARGBRow_C(const uint16* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreAR30(uint8* rgb_buf,
|
||||
int b,
|
||||
int g,
|
||||
int r) {
|
||||
uint32 ar30;
|
||||
b = b >> 4; // convert 10.6 to 10 bit.
|
||||
g = g >> 4;
|
||||
r = r >> 4;
|
||||
b = Clamp10(b);
|
||||
g = Clamp10(g);
|
||||
r = Clamp10(r);
|
||||
ar30 = b | ((uint32)g << 10) | ((uint32)r << 20) | 0xc0000000;
|
||||
(*(uint32*)rgb_buf) = ar30;
|
||||
}
|
||||
|
||||
// 10 bit YUV to 10 bit AR30
|
||||
void I210ToAR30Row_C(const uint16* src_y,
|
||||
const uint16* src_u,
|
||||
const uint16* src_v,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
int x;
|
||||
int b;
|
||||
int g;
|
||||
int r;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
|
||||
StoreAR30(rgb_buf, b, g, r);
|
||||
YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
|
||||
StoreAR30(rgb_buf + 4, b, g, r);
|
||||
src_y += 2;
|
||||
src_u += 1;
|
||||
src_v += 1;
|
||||
rgb_buf += 8; // Advance 2 pixels.
|
||||
}
|
||||
if (width & 1) {
|
||||
YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
|
||||
StoreAR30(rgb_buf, b, g, r);
|
||||
}
|
||||
}
|
||||
|
||||
void I422AlphaToARGBRow_C(const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
|
||||
@ -1696,7 +1696,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
|
||||
"movdqa 160(%[yuvconstants]),%%xmm13 \n" \
|
||||
"movdqa 192(%[yuvconstants]),%%xmm14 \n"
|
||||
// Convert 8 pixels: 8 UV and 8 Y
|
||||
#define YUVTORGB(yuvconstants) \
|
||||
#define YUVTORGB16(yuvconstants) \
|
||||
"movdqa %%xmm0,%%xmm1 \n" \
|
||||
"movdqa %%xmm0,%%xmm2 \n" \
|
||||
"movdqa %%xmm0,%%xmm3 \n" \
|
||||
@ -1712,20 +1712,14 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
|
||||
"pmulhuw %%xmm14,%%xmm4 \n" \
|
||||
"paddsw %%xmm4,%%xmm0 \n" \
|
||||
"paddsw %%xmm4,%%xmm1 \n" \
|
||||
"paddsw %%xmm4,%%xmm2 \n" \
|
||||
"psraw $0x6,%%xmm0 \n" \
|
||||
"psraw $0x6,%%xmm1 \n" \
|
||||
"psraw $0x6,%%xmm2 \n" \
|
||||
"packuswb %%xmm0,%%xmm0 \n" \
|
||||
"packuswb %%xmm1,%%xmm1 \n" \
|
||||
"packuswb %%xmm2,%%xmm2 \n"
|
||||
"paddsw %%xmm4,%%xmm2 \n"
|
||||
#define YUVTORGB_REGS \
|
||||
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
|
||||
|
||||
#else
|
||||
#define YUVTORGB_SETUP(yuvconstants)
|
||||
// Convert 8 pixels: 8 UV and 8 Y
|
||||
#define YUVTORGB(yuvconstants) \
|
||||
#define YUVTORGB16(yuvconstants) \
|
||||
"movdqa %%xmm0,%%xmm1 \n" \
|
||||
"movdqa %%xmm0,%%xmm2 \n" \
|
||||
"movdqa %%xmm0,%%xmm3 \n" \
|
||||
@ -1741,15 +1735,18 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
|
||||
"pmulhuw 192(%[yuvconstants]),%%xmm4 \n" \
|
||||
"paddsw %%xmm4,%%xmm0 \n" \
|
||||
"paddsw %%xmm4,%%xmm1 \n" \
|
||||
"paddsw %%xmm4,%%xmm2 \n" \
|
||||
"paddsw %%xmm4,%%xmm2 \n"
|
||||
#define YUVTORGB_REGS
|
||||
#endif
|
||||
|
||||
#define YUVTORGB(yuvconstants) \
|
||||
YUVTORGB16(yuvconstants) \
|
||||
"psraw $0x6,%%xmm0 \n" \
|
||||
"psraw $0x6,%%xmm1 \n" \
|
||||
"psraw $0x6,%%xmm2 \n" \
|
||||
"packuswb %%xmm0,%%xmm0 \n" \
|
||||
"packuswb %%xmm1,%%xmm1 \n" \
|
||||
"packuswb %%xmm2,%%xmm2 \n"
|
||||
#define YUVTORGB_REGS
|
||||
#endif
|
||||
|
||||
// Store 8 ARGB values.
|
||||
#define STOREARGB \
|
||||
@ -1774,6 +1771,32 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
|
||||
"movdqu %%xmm0,0x10(%[dst_rgba]) \n" \
|
||||
"lea 0x20(%[dst_rgba]),%[dst_rgba] \n"
|
||||
|
||||
// Store 8 AR30 values.
|
||||
#define STOREAR30 \
|
||||
"psraw $0x4,%%xmm0 \n" \
|
||||
"psraw $0x4,%%xmm1 \n" \
|
||||
"psraw $0x4,%%xmm2 \n" \
|
||||
"pminsw %%xmm7,%%xmm0 \n" \
|
||||
"pminsw %%xmm7,%%xmm1 \n" \
|
||||
"pminsw %%xmm7,%%xmm2 \n" \
|
||||
"pmaxsw %%xmm6,%%xmm0 \n" \
|
||||
"pmaxsw %%xmm6,%%xmm1 \n" \
|
||||
"pmaxsw %%xmm6,%%xmm2 \n" \
|
||||
"psllw $0x4,%%xmm2 \n" \
|
||||
"movdqa %%xmm0,%%xmm3 \n" \
|
||||
"punpcklwd %%xmm2,%%xmm0 \n" \
|
||||
"punpckhwd %%xmm2,%%xmm3 \n" \
|
||||
"movdqa %%xmm1,%%xmm2 \n" \
|
||||
"punpcklwd %%xmm5,%%xmm1 \n" \
|
||||
"punpckhwd %%xmm5,%%xmm2 \n" \
|
||||
"pslld $0xa,%%xmm1 \n" \
|
||||
"pslld $0xa,%%xmm2 \n" \
|
||||
"por %%xmm1,%%xmm0 \n" \
|
||||
"por %%xmm2,%%xmm3 \n" \
|
||||
"movdqu %%xmm0,(%[dst_ar30]) \n" \
|
||||
"movdqu %%xmm3,0x10(%[dst_ar30]) \n" \
|
||||
"lea 0x20(%[dst_ar30]), %[dst_ar30] \n"
|
||||
|
||||
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
@ -1908,6 +1931,41 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf,
|
||||
);
|
||||
}
|
||||
|
||||
// 10 bit YUV to AR30
|
||||
void OMITFP I210ToAR30Row_SSSE3(const uint16* y_buf,
|
||||
const uint16* u_buf,
|
||||
const uint16* v_buf,
|
||||
uint8* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $14,%%xmm5 \n"
|
||||
"psllw $4,%%xmm5 \n" // 2 alpha bits
|
||||
"pxor %%xmm6,%%xmm6 \n"
|
||||
"pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min
|
||||
"psrlw $6,%%xmm7 \n" // 1023 for max
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
READYUV210
|
||||
YUVTORGB16(yuvconstants)
|
||||
STOREAR30
|
||||
"sub $0x8,%[width] \n"
|
||||
"jg 1b \n"
|
||||
: [y_buf]"+r"(y_buf), // %[y_buf]
|
||||
[u_buf]"+r"(u_buf), // %[u_buf]
|
||||
[v_buf]"+r"(v_buf), // %[v_buf]
|
||||
[dst_ar30]"+r"(dst_ar30), // %[dst_ar30]
|
||||
[width]"+rm"(width) // %[width]
|
||||
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
||||
: "memory", "cc", YUVTORGB_REGS
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
}
|
||||
|
||||
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
|
||||
void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user