mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
ShortToHalfFloat_AVX2 function
BUG=libyuv:560 TEST=local compile for windows R=wangcheng@google.com Review URL: https://codereview.chromium.org/2364293002 .
This commit is contained in:
parent
bcd823805c
commit
6732bcbde9
@ -252,6 +252,7 @@ extern "C" {
|
||||
#define HAS_ARGBTORGB565ROW_AVX2
|
||||
#define HAS_J400TOARGBROW_AVX2
|
||||
#define HAS_RGB565TOARGBROW_AVX2
|
||||
#define HAS_SHORTTOF16ROW_AVX2
|
||||
#endif
|
||||
|
||||
// The following are also available on x64 Visual C.
|
||||
@ -1932,6 +1933,10 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
|
||||
uint8* dst_argb, const float* poly,
|
||||
int width);
|
||||
|
||||
// Scale and convert to half float.
|
||||
void ShortToF16Row_C(const uint16* src, int16* dst, float scale, int width);
|
||||
void ShortToF16Row_AVX2(const uint16* src, int16* dst, float scale, int width);
|
||||
|
||||
void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
|
||||
const uint8* luma, uint32 lumacoeff);
|
||||
void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
||||
|
||||
@ -6095,6 +6095,36 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
|
||||
}
|
||||
#endif // HAS_ARGBPOLYNOMIALROW_AVX2
|
||||
|
||||
// Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
|
||||
// adjust the sample range to 0 to 1 using a float multiply.
|
||||
// e.g. 9 bit scale is 1.0f / 512.0f
|
||||
// e.g. 10 bit scale is 1.0f / 1024.0f
|
||||
#ifdef HAS_SHORTTOHALFFLOAT_AVX2
|
||||
__declspec(naked)
|
||||
void ShortToF16Row_AVX2(const uint16* src, int16* dst, float scale, int width) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] /* src */
|
||||
mov edx, [esp + 8] /* dst */
|
||||
vbroadcastss ymm4, [esp + 12] /* scale */
|
||||
mov ecx, [esp + 16] /* width */
|
||||
|
||||
// 8 pixel loop.
|
||||
convertloop:
|
||||
vpmovzxwd ymm0, xmmword ptr [eax] // 8 shorts -> 8 ints
|
||||
lea eax, [eax + 16]
|
||||
vcvtdq2ps ymm0, ymm0 // convert 8 ints to floats
|
||||
vmulps ymm0, ymm0, ymm4 // scale to normalized range 0 to 1
|
||||
vcvtps2ph xmm0, ymm0, 0 // float conver to 8 half floats round even
|
||||
vmovdqu [edx], xmm0
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 8
|
||||
jg convertloop
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_SHORTTOHALFFLOAT_AVX2
|
||||
|
||||
#ifdef HAS_ARGBCOLORTABLEROW_X86
|
||||
// Tranform ARGB pixels with color table.
|
||||
__declspec(naked)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user