From 7472021e53f9306b67ea1ba83fb5b232486537a3 Mon Sep 17 00:00:00 2001 From: "arphaxad.cy@gmail.com" Date: Mon, 31 Oct 2011 20:54:44 +0000 Subject: [PATCH] git-svn-id: http://libyuv.googlecode.com/svn/trunk@50 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- source/rotate.cc | 130 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 1 deletion(-) diff --git a/source/rotate.cc b/source/rotate.cc index 14a5252f3..a1b05e898 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -367,7 +367,135 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride, ); } -// TODO(fbarchard): Port to 32 bit +#if defined (__i386__) +extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride, + uint8* dst_a, int dst_stride_a, + uint8* dst_b, int dst_stride_b, + int w); + asm( + ".text\n" +#if defined(OSX) + ".globl _TransposeUVWx8_SSE2\n" +"_TransposeUVWx8_SSE2:\n" +#else + ".global TransposeUVWx8_SSE2\n" +"TransposeUVWx8_SSE2:\n" +#endif + "push %ebx\n" + "push %esi\n" + "push %edi\n" + "push %ebp\n" + "mov 0x14(%esp),%eax\n" + "mov 0x18(%esp),%edi\n" + "mov 0x1c(%esp),%edx\n" + "mov 0x20(%esp),%esi\n" + "mov 0x24(%esp),%ebx\n" + "mov 0x28(%esp),%ebp\n" + "mov %esp,%ecx\n" + "sub $0x14,%esp\n" + "and $0xfffffff0,%esp\n" + "mov %ecx,0x10(%esp)\n" + "mov 0x2c(%ecx),%ecx\n" + +"1:" + "movdqa (%eax),%xmm0\n" + "movdqa (%eax,%edi,1),%xmm1\n" + "lea (%eax,%edi,2),%eax\n" + "movdqa %xmm0,%xmm7\n" + "punpcklbw %xmm1,%xmm0\n" + "punpckhbw %xmm1,%xmm7\n" + "movdqa %xmm7,%xmm1\n" + "movdqa (%eax),%xmm2\n" + "movdqa (%eax,%edi,1),%xmm3\n" + "lea (%eax,%edi,2),%eax\n" + "movdqa %xmm2,%xmm7\n" + "punpcklbw %xmm3,%xmm2\n" + "punpckhbw %xmm3,%xmm7\n" + "movdqa %xmm7,%xmm3\n" + "movdqa (%eax),%xmm4\n" + "movdqa (%eax,%edi,1),%xmm5\n" + "lea (%eax,%edi,2),%eax\n" + "movdqa %xmm4,%xmm7\n" + "punpcklbw %xmm5,%xmm4\n" + "punpckhbw %xmm5,%xmm7\n" + "movdqa %xmm7,%xmm5\n" + "movdqa (%eax),%xmm6\n" + "movdqa (%eax,%edi,1),%xmm7\n" + "lea (%eax,%edi,2),%eax\n" + "movdqa %xmm5,(%esp)\n" + "neg %edi\n" + "movdqa %xmm6,%xmm5\n" + "punpcklbw %xmm7,%xmm6\n" + "punpckhbw %xmm7,%xmm5\n" + "movdqa %xmm5,%xmm7\n" + "lea 0x10(%eax,%edi,8),%eax\n" + "neg %edi\n" + "movdqa %xmm0,%xmm5\n" + "punpcklwd %xmm2,%xmm0\n" + "punpckhwd %xmm2,%xmm5\n" + "movdqa %xmm5,%xmm2\n" + "movdqa %xmm1,%xmm5\n" + "punpcklwd %xmm3,%xmm1\n" + "punpckhwd %xmm3,%xmm5\n" + "movdqa %xmm5,%xmm3\n" + "movdqa %xmm4,%xmm5\n" + "punpcklwd %xmm6,%xmm4\n" + "punpckhwd %xmm6,%xmm5\n" + "movdqa %xmm5,%xmm6\n" + "movdqa (%esp),%xmm5\n" + "movdqa %xmm6,(%esp)\n" + "movdqa %xmm5,%xmm6\n" + "punpcklwd %xmm7,%xmm5\n" + "punpckhwd %xmm7,%xmm6\n" + "movdqa %xmm6,%xmm7\n" + "movdqa %xmm0,%xmm6\n" + "punpckldq %xmm4,%xmm0\n" + "punpckhdq %xmm4,%xmm6\n" + "movdqa %xmm6,%xmm4\n" + "movdqa (%esp),%xmm6\n" + "movlpd %xmm0,(%edx)\n" + "movhpd %xmm0,(%ebx)\n" + "movlpd %xmm4,(%edx,%esi,1)\n" + "lea (%edx,%esi,2),%edx\n" + "movhpd %xmm4,(%ebx,%ebp,1)\n" + "lea (%ebx,%ebp,2),%ebx\n" + "movdqa %xmm2,%xmm0\n" + "punpckldq %xmm6,%xmm2\n" + "movlpd %xmm2,(%edx)\n" + "movhpd %xmm2,(%ebx)\n" + "punpckhdq %xmm6,%xmm0\n" + "movlpd %xmm0,(%edx,%esi,1)\n" + "lea (%edx,%esi,2),%edx\n" + "movhpd %xmm0,(%ebx,%ebp,1)\n" + "lea (%ebx,%ebp,2),%ebx\n" + "movdqa %xmm1,%xmm0\n" + "punpckldq %xmm5,%xmm1\n" + "movlpd %xmm1,(%edx)\n" + "movhpd %xmm1,(%ebx)\n" + "punpckhdq %xmm5,%xmm0\n" + "movlpd %xmm0,(%edx,%esi,1)\n" + "lea (%edx,%esi,2),%edx\n" + "movhpd %xmm0,(%ebx,%ebp,1)\n" + "lea (%ebx,%ebp,2),%ebx\n" + "movdqa %xmm3,%xmm0\n" + "punpckldq %xmm7,%xmm3\n" + "movlpd %xmm3,(%edx)\n" + "movhpd %xmm3,(%ebx)\n" + "punpckhdq %xmm7,%xmm0\n" + "movlpd %xmm0,(%edx,%esi,1)\n" + "lea (%edx,%esi,2),%edx\n" + "movhpd %xmm0,(%ebx,%ebp,1)\n" + "lea (%ebx,%ebp,2),%ebx\n" + "sub $0x8,%ecx\n" + "ja 1b\n" + "mov 0x10(%esp),%esp\n" + "pop %ebp\n" + "pop %edi\n" + "pop %esi\n" + "pop %ebx\n" + "ret\n" +); + #if defined (__x86_64__) #define HAS_TRANSPOSE_UVWX8_SSE2 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,