mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-08 01:36:47 +08:00
InterpolateRow used for scale handle unaligned memory. Remove HalfRow which is not used.
BUG=367 TESTED=unittest on I422ToI420 R=harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/28639004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1107 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
0c603fbca3
commit
d83f63a3b4
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1105
|
||||
Version: 1106
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -124,7 +124,6 @@ extern "C" {
|
||||
#define HAS_COPYROW_ERMS
|
||||
#define HAS_COPYROW_SSE2
|
||||
#define HAS_COPYROW_X86
|
||||
#define HAS_HALFROW_SSE2
|
||||
#define HAS_I400TOARGBROW_SSE2
|
||||
#define HAS_I411TOARGBROW_SSSE3
|
||||
#define HAS_I422TOARGB1555ROW_SSSE3
|
||||
@ -212,7 +211,6 @@ extern "C" {
|
||||
#define HAS_ARGBTOUVROW_AVX2
|
||||
#define HAS_ARGBTOYJROW_AVX2
|
||||
#define HAS_ARGBTOYROW_AVX2
|
||||
#define HAS_HALFROW_AVX2
|
||||
#define HAS_I422TOARGBROW_AVX2
|
||||
#define HAS_INTERPOLATEROW_AVX2
|
||||
#define HAS_MERGEUVROW_AVX2
|
||||
@ -300,7 +298,6 @@ extern "C" {
|
||||
#define HAS_UYVYTOUV422ROW_NEON
|
||||
#define HAS_YUY2TOUVROW_NEON
|
||||
#define HAS_UYVYTOUVROW_NEON
|
||||
#define HAS_HALFROW_NEON
|
||||
#define HAS_ARGBTOBAYERROW_NEON
|
||||
#define HAS_ARGBTOBAYERGGROW_NEON
|
||||
#define HAS_ARGBSHUFFLEROW_NEON
|
||||
@ -378,7 +375,6 @@ extern "C" {
|
||||
#define HAS_BGRATOUVROW_NEON
|
||||
#define HAS_BGRATOYROW_NEON
|
||||
#define HAS_COPYROW_NEON
|
||||
#define HAS_HALFROW_NEON
|
||||
#define HAS_I400TOARGBROW_NEON
|
||||
#define HAS_I411TOARGBROW_NEON
|
||||
#define HAS_I422TOABGRROW_NEON
|
||||
@ -1577,18 +1573,6 @@ void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
|
||||
uint8* dst_u, uint8* dst_v, int pix);
|
||||
|
||||
void HalfRow_C(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix);
|
||||
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix);
|
||||
void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix);
|
||||
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix);
|
||||
|
||||
void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
|
||||
uint16* dst_uv, int pix);
|
||||
|
||||
void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix);
|
||||
void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1105
|
||||
#define LIBYUV_VERSION 1106
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -1885,17 +1885,17 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
|
||||
}
|
||||
}
|
||||
|
||||
// Blend 2 rows into 1 for conversions such as I422ToI420.
|
||||
void HalfRow_C(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix) {
|
||||
// Blend 2 rows into 1.
|
||||
static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix) {
|
||||
int x;
|
||||
for (x = 0; x < pix; ++x) {
|
||||
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
|
||||
uint16* dst_uv, int pix) {
|
||||
static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
|
||||
uint16* dst_uv, int pix) {
|
||||
int x;
|
||||
for (x = 0; x < pix; ++x) {
|
||||
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
|
||||
|
||||
@ -1274,30 +1274,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
);
|
||||
}
|
||||
|
||||
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix) {
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"add %1, %0 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels.
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels.
|
||||
"vrhadd.u8 q0, q1 \n" // average row 1 and 2
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q0}, [%2]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(src_uv_stride), // %1
|
||||
"+r"(dst_uv), // %2
|
||||
"+r"(pix) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1" // Clobber List
|
||||
);
|
||||
}
|
||||
|
||||
// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
|
||||
void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix) {
|
||||
|
||||
@ -1260,32 +1260,6 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
|
||||
}
|
||||
#endif // HAS_UYVYTOUVROW_NEON
|
||||
|
||||
#ifdef HAS_HALFROW_NEON
|
||||
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix) {
|
||||
const uint8* src_uvb = src_uv + src_uv_stride;
|
||||
asm volatile (
|
||||
// change the stride to row 2 pointer
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b}, [%0], #16 \n" // load row 1 16 pixels.
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(1)
|
||||
"ld1 {v1.16b}, [%1], #16 \n" // load row 2 16 pixels.
|
||||
"urhadd v0.16b, v0.16b, v1.16b \n" // average row 1 and 2
|
||||
MEMACCESS(2)
|
||||
"st1 {v0.16b}, [%2], #16 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(src_uvb), // %1
|
||||
"+r"(dst_uv), // %2
|
||||
"+r"(pix) // %3
|
||||
:
|
||||
: "cc", "memory", "v0", "v1" // Clobber List
|
||||
);
|
||||
}
|
||||
#endif // HAS_HALFROW_NEON
|
||||
|
||||
// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
|
||||
#ifdef HAS_ARGBTOBAYERROW_NEON
|
||||
void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
|
||||
|
||||
@ -3182,14 +3182,14 @@ void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
"sub %0,%1 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa " MEMACCESS(0) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm2 \n"
|
||||
"movdqa %%xmm0," MEMACCESS(2) " \n"
|
||||
"movdqa %%xmm2," MEMACCESS2(0x10,2) " \n"
|
||||
"movdqu %%xmm0," MEMACCESS(2) " \n"
|
||||
"movdqu %%xmm2," MEMACCESS2(0x10,2) " \n"
|
||||
"lea " MEMLEA(0x20,2) ",%2 \n"
|
||||
"sub $0x10,%3 \n"
|
||||
"jg 1b \n"
|
||||
@ -3246,11 +3246,11 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
|
||||
asm volatile (
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"movdqa %%xmm0," MEMACCESS(1) " \n"
|
||||
"movdqa %%xmm1," MEMACCESS2(0x10,1) " \n"
|
||||
"movdqu %%xmm0," MEMACCESS(1) " \n"
|
||||
"movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
@ -3331,19 +3331,19 @@ void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
|
||||
"psrld $0x8,%%xmm1 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa " MEMACCESS(0) ",%%xmm2 \n"
|
||||
"movdqa " MEMACCESS2(0x10,0) ",%%xmm3 \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm2 \n"
|
||||
"movdqu " MEMACCESS2(0x10,0) ",%%xmm3 \n"
|
||||
"lea " MEMLEA(0x20,0) ",%0 \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm4 \n"
|
||||
"movdqa " MEMACCESS2(0x10,1) ",%%xmm5 \n"
|
||||
"movdqu " MEMACCESS(1) ",%%xmm4 \n"
|
||||
"movdqu " MEMACCESS2(0x10,1) ",%%xmm5 \n"
|
||||
"pand %%xmm0,%%xmm2 \n"
|
||||
"pand %%xmm0,%%xmm3 \n"
|
||||
"pand %%xmm1,%%xmm4 \n"
|
||||
"pand %%xmm1,%%xmm5 \n"
|
||||
"por %%xmm4,%%xmm2 \n"
|
||||
"por %%xmm5,%%xmm3 \n"
|
||||
"movdqa %%xmm2," MEMACCESS(1) " \n"
|
||||
"movdqa %%xmm3," MEMACCESS2(0x10,1) " \n"
|
||||
"movdqu %%xmm2," MEMACCESS(1) " \n"
|
||||
"movdqu %%xmm3," MEMACCESS2(0x10,1) " \n"
|
||||
"lea " MEMLEA(0x20,1) ",%1 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"jg 1b \n"
|
||||
@ -5377,8 +5377,8 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// General purpose row blend.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqa,0x00,1,4,1,xmm2)
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,1,4,1,xmm2)
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"punpcklbw %%xmm2,%%xmm0 \n"
|
||||
"punpckhbw %%xmm2,%%xmm1 \n"
|
||||
@ -5389,7 +5389,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 1b \n"
|
||||
"jmp 99f \n"
|
||||
@ -5397,13 +5397,13 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// Blend 25 / 75.
|
||||
LABELALIGN
|
||||
"25: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqa,0x00,1,4,1,xmm1)
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,1,4,1,xmm1)
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 25b \n"
|
||||
"jmp 99f \n"
|
||||
@ -5411,12 +5411,12 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// Blend 50 / 50.
|
||||
LABELALIGN
|
||||
"50: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqa,0x00,1,4,1,xmm1)
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,1,4,1,xmm1)
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 50b \n"
|
||||
"jmp 99f \n"
|
||||
@ -5424,13 +5424,13 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// Blend 75 / 25.
|
||||
LABELALIGN
|
||||
"75: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm1 \n"
|
||||
MEMOPREG(movdqa,0x00,1,4,1,xmm0)
|
||||
"movdqu " MEMACCESS(1) ",%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,1,4,1,xmm0)
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 75b \n"
|
||||
"jmp 99f \n"
|
||||
@ -5438,9 +5438,9 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// Blend 100 / 0 - Copy row unchanged.
|
||||
LABELALIGN
|
||||
"100: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 100b \n"
|
||||
|
||||
@ -5490,8 +5490,8 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// General purpose row blend.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqa,0x00,1,4,1,xmm2) // movdqa (%1,%4,1),%%xmm2
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklbw %%xmm4,%%xmm2 \n"
|
||||
@ -5509,7 +5509,7 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
"packuswb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 1b \n"
|
||||
"jmp 99f \n"
|
||||
@ -5517,13 +5517,13 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// Blend 25 / 75.
|
||||
LABELALIGN
|
||||
"25: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 25b \n"
|
||||
"jmp 99f \n"
|
||||
@ -5531,12 +5531,12 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// Blend 50 / 50.
|
||||
LABELALIGN
|
||||
"50: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 50b \n"
|
||||
"jmp 99f \n"
|
||||
@ -5544,13 +5544,13 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// Blend 75 / 25.
|
||||
LABELALIGN
|
||||
"75: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm1 \n"
|
||||
MEMOPREG(movdqa,0x00,1,4,1,xmm0) // movdqa (%1,%4,1),%%xmm0
|
||||
"movdqu " MEMACCESS(1) ",%%xmm1 \n"
|
||||
MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"pavgb %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
BUNDLEALIGN
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 75b \n"
|
||||
"jmp 99f \n"
|
||||
@ -5558,9 +5558,9 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
// Blend 100 / 0 - Copy row unchanged.
|
||||
LABELALIGN
|
||||
"100: \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm0 \n"
|
||||
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1)
|
||||
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
||||
"lea " MEMLEA(0x10,1) ",%1 \n"
|
||||
"jg 100b \n"
|
||||
|
||||
@ -5813,31 +5813,6 @@ void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
}
|
||||
#endif // HAS_INTERPOLATEROW_SSE2
|
||||
|
||||
#ifdef HAS_HALFROW_SSE2
|
||||
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix) {
|
||||
asm volatile (
|
||||
"sub %0,%1 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa " MEMACCESS(0) ",%%xmm0 \n"
|
||||
MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3),%%xmm0
|
||||
"sub $0x10,%2 \n"
|
||||
MEMOPMEM(movdqa,xmm0,0x00,0,1,1) // movdqa %%xmm0,(%0,%1)
|
||||
"lea " MEMLEA(0x10,0) ",%0 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_uv), // %0
|
||||
"+r"(dst_uv), // %1
|
||||
"+r"(pix) // %2
|
||||
: "r"((intptr_t)(src_uv_stride)) // %3
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0"
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#endif // HAS_HALFROW_SSE2
|
||||
|
||||
#ifdef HAS_ARGBTOBAYERROW_SSSE3
|
||||
void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix) {
|
||||
|
||||
@ -3674,11 +3674,11 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
|
||||
|
||||
align 4
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
movdqa xmm1, [eax + 16]
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + 16]
|
||||
lea eax, [eax + 32]
|
||||
movdqa [edx], xmm0
|
||||
movdqa [edx + 16], xmm1
|
||||
movdqu [edx], xmm0
|
||||
movdqu [edx + 16], xmm1
|
||||
lea edx, [edx + 32]
|
||||
sub ecx, 32
|
||||
jg convertloop
|
||||
@ -6540,58 +6540,6 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
}
|
||||
#endif // HAS_INTERPOLATEROW_SSE2
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix) {
|
||||
__asm {
|
||||
push edi
|
||||
mov eax, [esp + 4 + 4] // src_uv
|
||||
mov edx, [esp + 4 + 8] // src_uv_stride
|
||||
mov edi, [esp + 4 + 12] // dst_v
|
||||
mov ecx, [esp + 4 + 16] // pix
|
||||
sub edi, eax
|
||||
|
||||
align 4
|
||||
convertloop:
|
||||
movdqa xmm0, [eax]
|
||||
pavgb xmm0, [eax + edx]
|
||||
sub ecx, 16
|
||||
movdqa [eax + edi], xmm0
|
||||
lea eax, [eax + 16]
|
||||
jg convertloop
|
||||
pop edi
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAS_HALFROW_AVX2
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
|
||||
uint8* dst_uv, int pix) {
|
||||
__asm {
|
||||
push edi
|
||||
mov eax, [esp + 4 + 4] // src_uv
|
||||
mov edx, [esp + 4 + 8] // src_uv_stride
|
||||
mov edi, [esp + 4 + 12] // dst_v
|
||||
mov ecx, [esp + 4 + 16] // pix
|
||||
sub edi, eax
|
||||
|
||||
align 4
|
||||
convertloop:
|
||||
vmovdqu ymm0, [eax]
|
||||
vpavgb ymm0, ymm0, [eax + edx]
|
||||
sub ecx, 32
|
||||
vmovdqu [eax + edi], ymm0
|
||||
lea eax, [eax + 32]
|
||||
jg convertloop
|
||||
|
||||
pop edi
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // HAS_HALFROW_AVX2
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user