ARGBToI444_SSSE3 UV function ported. Thanks to changjun.yang@intel.com

BUG=148 TESTED=out\release\libyuv_unittest --gtest_filter=*ARGBToI* | grep ms Review URL: https://webrtc-codereview.appspot.com/1019011 git-svn-id: http://libyuv.googlecode.com/svn/trunk@539 16f28f9a-4ce2-e073-06de-1de4eb20be90
2026-02-11 04:39:53 +08:00 · 2013-01-16 05:54:56 +00:00 · 2013-01-16 05:54:56 +00:00 · 41e972ec31
commit 41e972ec31
parent ba45bbff34
6 changed files with 1025 additions and 872 deletions
--- a/README.chromium
+++ b/README.chromium
@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 538
+Version: 539
 License: BSD
 License File: LICENSE
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@ -119,6 +119,7 @@ extern "C" {
 #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
 #define HAS_ARGBCOLORTABLEROW_X86
 #define HAS_ARGBMULTIPLYROW_SSE2
 #define HAS_ARGBTOUV444ROW_SSSE3
 #endif
 // The following are Yasm x86 only.
@ -492,6 +493,13 @@ void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
                       uint8* dst_u, uint8* dst_v, int width);
 void ARGBToUV444Row_SSSE3(const uint8* src_argb,
                          uint8* dst_u, uint8* dst_v, int width);
 void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
                                    uint8* dst_u, uint8* dst_v, int width);
 void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
                              uint8* dst_u, uint8* dst_v, int width);
 void ARGBToUV422Row_SSSE3(const uint8* src_argb,
                          uint8* dst_u, uint8* dst_v, int width);
 void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 538
+#define LIBYUV_VERSION 539
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
--- a/source/row_any.cc
+++ b/source/row_any.cc
@ -287,6 +287,23 @@ UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
 #endif
 #undef UVANY
 #define UV444ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK)                  \
    void NAMEANY(const uint8* src_uv,                                          \
                 uint8* dst_u, uint8* dst_v, int width) {                      \
      int n = width & ~MASK;                                                   \
      ANYTOUV_SIMD(src_uv, dst_u, dst_v, n);                                   \
      ANYTOUV_C(src_uv  + n * BPP,                                             \
                dst_u + n,                                                     \
                dst_v + n,                                                     \
                width & MASK);                                                 \
    }
 #ifdef HAS_ARGBTOUV444ROW_SSSE3
 UV444ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
         ARGBToUV444Row_C, 4, 15)
 #endif
 #undef UV444ANY
 #define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT)           \
    void NAMEANY(const uint8* src_uv,                                          \
                 uint8* dst_u, uint8* dst_v, int width) {                      \
--- a/source/row_win.cc
+++ b/source/row_win.cc
@ -1101,6 +1101,122 @@ __asm {
  }
 }
 __declspec(naked) __declspec(align(16))
 void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
                          uint8* dst_u, uint8* dst_v, int width) {
 __asm {
    push       edi
    mov        eax, [esp + 4 + 4]   // src_argb
    mov        edx, [esp + 4 + 8]   // dst_u
    mov        edi, [esp + 4 + 12]  // dst_v
    mov        ecx, [esp + 4 + 16]  // pix
    movdqa     xmm7, kARGBToU
    movdqa     xmm6, kARGBToV
    movdqa     xmm5, kAddUV128
    sub        edi, edx             // stride from u to v
    align      16
 convertloop:
    /* convert to U and V */
    movdqa     xmm0, [eax]          // U
    movdqa     xmm1, [eax + 16]
    movdqa     xmm2, [eax + 32]
    movdqa     xmm3, [eax + 48]
    pmaddubsw  xmm0, xmm7
    pmaddubsw  xmm1, xmm7
    pmaddubsw  xmm2, xmm7
    pmaddubsw  xmm3, xmm7
    phaddw     xmm0, xmm1
    phaddw     xmm2, xmm3
    psrlw      xmm0, 8
    psrlw      xmm2, 8
    packuswb   xmm0, xmm2
    paddb      xmm0, xmm5
    sub        ecx,  16
    movdqa     [edx], xmm0
    movdqa     xmm0, [eax]          // V
    movdqa     xmm1, [eax + 16]
    movdqa     xmm2, [eax + 32]
    movdqa     xmm3, [eax + 48]
    pmaddubsw  xmm0, xmm6
    pmaddubsw  xmm1, xmm6
    pmaddubsw  xmm2, xmm6
    pmaddubsw  xmm3, xmm6
    phaddw     xmm0, xmm1
    phaddw     xmm2, xmm3
    psrlw      xmm0, 8
    psrlw      xmm2, 8
    packuswb   xmm0, xmm2
    paddb      xmm0, xmm5
    lea        eax,  [eax + 64]
    movdqa     [edx + edi], xmm0
    lea        edx,  [edx + 16]
    jg         convertloop
    pop        edi
    ret
  }
 }
 __declspec(naked) __declspec(align(16))
 void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0,
                                    uint8* dst_u, uint8* dst_v, int width) {
 __asm {
    push       edi
    mov        eax, [esp + 4 + 4]   // src_argb
    mov        edx, [esp + 4 + 8]   // dst_u
    mov        edi, [esp + 4 + 12]  // dst_v
    mov        ecx, [esp + 4 + 16]  // pix
    movdqa     xmm7, kARGBToU
    movdqa     xmm6, kARGBToV
    movdqa     xmm5, kAddUV128
    sub        edi, edx             // stride from u to v
    align      16
 convertloop:
    /* convert to U and V */
    movdqu     xmm0, [eax]          // U
    movdqu     xmm1, [eax + 16]
    movdqu     xmm2, [eax + 32]
    movdqu     xmm3, [eax + 48]
    pmaddubsw  xmm0, xmm7
    pmaddubsw  xmm1, xmm7
    pmaddubsw  xmm2, xmm7
    pmaddubsw  xmm3, xmm7
    phaddw     xmm0, xmm1
    phaddw     xmm2, xmm3
    psrlw      xmm0, 8
    psrlw      xmm2, 8
    packuswb   xmm0, xmm2
    paddb      xmm0, xmm5
    sub        ecx,  16
    movdqu     [edx], xmm0
    movdqu     xmm0, [eax]          // V
    movdqu     xmm1, [eax + 16]
    movdqu     xmm2, [eax + 32]
    movdqu     xmm3, [eax + 48]
    pmaddubsw  xmm0, xmm6
    pmaddubsw  xmm1, xmm6
    pmaddubsw  xmm2, xmm6
    pmaddubsw  xmm3, xmm6
    phaddw     xmm0, xmm1
    phaddw     xmm2, xmm3
    psrlw      xmm0, 8
    psrlw      xmm2, 8
    packuswb   xmm0, xmm2
    paddb      xmm0, xmm5
    lea        eax,  [eax + 64]
    movdqu     [edx + edi], xmm0
    lea        edx,  [edx + 16]
    jg         convertloop
    pop        edi
    ret
  }
 }
 __declspec(naked) __declspec(align(16))
 void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
                          uint8* dst_u, uint8* dst_v, int width) {