diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 9c60fb08b..ada916cef 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -536,6 +536,11 @@ typedef uint8 ulvec8[32]; "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \ BUNDLEUNLOCK +#define VEXTOPMEM(opcode, reg, offset, base, index, scale) \ + BUNDLELOCK \ + "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ + #opcode " $0x0,%%" #reg ",(%%r15,%%r14)\n" \ + BUNDLEUNLOCK #else // defined(__native_client__) && defined(__x86_64__) #define BUNDLEALIGN "\n" #define MEMACCESS(base) "(%" #base ")" @@ -556,6 +561,8 @@ typedef uint8 ulvec8[32]; #define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \ #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \ #reg2 "\n" +#define VEXTOPMEM(opcode, reg, offset, base, index, scale) \ + #opcode " $0x0,%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" #endif // defined(__native_client__) && defined(__x86_64__) #if defined(__arm__) || defined(__aarch64__) diff --git a/source/row_any.cc b/source/row_any.cc index 25c271964..c0383c8fc 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -358,8 +358,6 @@ YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C, #ifdef HAS_ARGBTOUVROW_AVX2 UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31) -UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31) -UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31) #endif #ifdef HAS_ARGBTOUVROW_SSSE3 UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, ARGBToUVRow_C, 4, 15) @@ -368,6 +366,10 @@ UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, BGRAToUVRow_C, 4, 15) UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, ABGRToUVRow_C, 4, 15) UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, RGBAToUVRow_C, 4, 15) #endif +#ifdef HAS_YUY2TOUVROW_AVX2 +UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31) +UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31) +#endif #ifdef HAS_YUY2TOUVROW_SSE2 UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, YUY2ToUVRow_C, 2, 15) UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, UYVYToUVRow_C, 2, 15) diff --git a/source/row_posix.cc b/source/row_posix.cc index 4cca7917e..ecd828f65 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2907,7 +2907,7 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - MEMOPMEM(vextractf128,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) + VEXTOPMEM(vextractf128,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) "lea 0x10" MEMACCESS(1) ",%1 \n" "sub $0x20,%3 \n" "jg 1b \n" @@ -2949,7 +2949,7 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - MEMOPMEM(vextractf128,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) + VEXTOPMEM(vextractf128,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) "lea 0x10" MEMACCESS(1) ",%1 \n" "sub $0x20,%3 \n" "jg 1b \n" @@ -3020,7 +3020,7 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - MEMOPMEM(vextractf128,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) + VEXTOPMEM(vextractf128,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) "lea 0x10" MEMACCESS(1) ",%1 \n" "sub $0x20,%3 \n" "jg 1b \n" @@ -3062,7 +3062,7 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy, "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm1," MEMACCESS(1) " \n" - MEMOPMEM(vextractf128,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) + VEXTOPMEM(vextractf128,ymm0,0x00,1,2,1) // vextractf128 $0x0,%%ymm0,(%1,%2,1) "lea 0x10" MEMACCESS(1) ",%1 \n" "sub $0x20,%3 \n" "jg 1b \n"