mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
YUY2 for AVX2
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/887006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@424 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
0908a701e9
commit
02e48bf72b
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 423
|
||||
Version: 424
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 423
|
||||
#define LIBYUV_VERSION 424
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
61
source/row_x86.asm
Normal file
61
source/row_x86.asm
Normal file
@ -0,0 +1,61 @@
|
||||
;*
|
||||
;* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
;*
|
||||
;* Use of this source code is governed by a BSD-style license
|
||||
;* that can be found in the LICENSE file in the root of the source
|
||||
;* tree. An additional intellectual property rights grant can be found
|
||||
;* in the file PATENTS. All contributing project authors may
|
||||
;* be found in the AUTHORS file in the root of the source tree.
|
||||
;*
|
||||
|
||||
%include "x86inc.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
; void YUY2ToYRow_SSE2(const uint8* src_yuy2,
|
||||
; uint8* dst_y, int pix);
|
||||
|
||||
%macro YUY2TOYROW 2-3
|
||||
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
|
||||
%ifidn %1,YUY2
|
||||
pcmpeqb m2, m2 ; generate mask 0x00ff00ff
|
||||
psrlw m2, 8
|
||||
%endif
|
||||
|
||||
ALIGN 16
|
||||
.convertloop:
|
||||
mov%2 m0, [src_yuy2q]
|
||||
mov%2 m1, [src_yuy2q + mmsize]
|
||||
lea src_yuy2q, [src_yuy2q + mmsize * 2]
|
||||
%ifidn %1,YUY2
|
||||
pand m0, m2 ; YUY2 even bytes are Y
|
||||
pand m1, m2
|
||||
%else
|
||||
psrlw m0, 8 ; UYVY odd bytes are Y
|
||||
psrlw m1, 8
|
||||
%endif
|
||||
packuswb m0, m1
|
||||
sub pixd, mmsize
|
||||
mov%2 [dst_yq], m0
|
||||
lea dst_yq, [dst_yq + mmsize]
|
||||
jg .convertloop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
; TODO(fbarchard): Remove MMX when SSE2 is required.
|
||||
INIT_MMX MMX
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW YUY2,u,_Unaligned
|
||||
YUY2TOYROW UYVY,a,
|
||||
YUY2TOYROW UYVY,u,_Unaligned
|
||||
INIT_XMM SSE2
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW YUY2,u,_Unaligned
|
||||
YUY2TOYROW UYVY,a,
|
||||
YUY2TOYROW UYVY,u,_Unaligned
|
||||
INIT_YMM AVX2
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW YUY2,u,_Unaligned
|
||||
YUY2TOYROW UYVY,a,
|
||||
YUY2TOYROW UYVY,u,_Unaligned
|
||||
|
||||
@ -34,7 +34,9 @@
|
||||
; as this feature might be useful for others as well. Send patches or ideas
|
||||
; to x264-devel@videolan.org .
|
||||
|
||||
%define program_name x264
|
||||
; Local changes for libyuv:
|
||||
; remove %define program_name and references in labels
|
||||
; rename cpus to uppercase
|
||||
|
||||
%define WIN64 0
|
||||
%define UNIX64 0
|
||||
@ -505,7 +507,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||
%endmacro
|
||||
%macro cglobal_internal 1-2+
|
||||
%ifndef cglobaled_%1
|
||||
%xdefine %1 mangle(program_name %+ _ %+ %1)
|
||||
%xdefine %1 mangle(%1)
|
||||
%xdefine %1.skip_prologue %1 %+ .skip_prologue
|
||||
CAT_XDEFINE cglobaled_, %1, 1
|
||||
%endif
|
||||
@ -525,7 +527,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||
%endmacro
|
||||
|
||||
%macro cextern 1
|
||||
%xdefine %1 mangle(program_name %+ _ %+ %1)
|
||||
%xdefine %1 mangle(%1)
|
||||
CAT_XDEFINE cglobaled_, %1, 1
|
||||
extern %1
|
||||
%endmacro
|
||||
@ -538,7 +540,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||
%endmacro
|
||||
|
||||
%macro const 2+
|
||||
%xdefine %1 mangle(program_name %+ _ %+ %1)
|
||||
%xdefine %1 mangle(%1)
|
||||
global %1
|
||||
%1: %2
|
||||
%endmacro
|
||||
@ -551,22 +553,22 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
|
||||
|
||||
; cpuflags
|
||||
|
||||
%assign cpuflags_mmx (1<<0)
|
||||
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx
|
||||
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx
|
||||
%assign cpuflags_MMX (1<<0)
|
||||
%assign cpuflags_MMX2 (1<<1) | cpuflags_MMX
|
||||
%assign cpuflags_3dnow (1<<2) | cpuflags_MMX
|
||||
%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow
|
||||
%assign cpuflags_sse (1<<4) | cpuflags_mmx2
|
||||
%assign cpuflags_sse2 (1<<5) | cpuflags_sse
|
||||
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
|
||||
%assign cpuflags_sse3 (1<<7) | cpuflags_sse2
|
||||
%assign cpuflags_ssse3 (1<<8) | cpuflags_sse3
|
||||
%assign cpuflags_sse4 (1<<9) | cpuflags_ssse3
|
||||
%assign cpuflags_sse42 (1<<10)| cpuflags_sse4
|
||||
%assign cpuflags_avx (1<<11)| cpuflags_sse42
|
||||
%assign cpuflags_xop (1<<12)| cpuflags_avx
|
||||
%assign cpuflags_fma4 (1<<13)| cpuflags_avx
|
||||
%assign cpuflags_avx2 (1<<14)| cpuflags_avx
|
||||
%assign cpuflags_fma3 (1<<15)| cpuflags_avx
|
||||
%assign cpuflags_SSE (1<<4) | cpuflags_MMX2
|
||||
%assign cpuflags_SSE2 (1<<5) | cpuflags_SSE
|
||||
%assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2
|
||||
%assign cpuflags_SSE3 (1<<7) | cpuflags_SSE2
|
||||
%assign cpuflags_SSSE3 (1<<8) | cpuflags_SSE3
|
||||
%assign cpuflags_SSE4 (1<<9) | cpuflags_SSSE3
|
||||
%assign cpuflags_SSE42 (1<<10)| cpuflags_SSE4
|
||||
%assign cpuflags_AVX (1<<11)| cpuflags_SSE42
|
||||
%assign cpuflags_xop (1<<12)| cpuflags_AVX
|
||||
%assign cpuflags_fma4 (1<<13)| cpuflags_AVX
|
||||
%assign cpuflags_AVX2 (1<<14)| cpuflags_AVX
|
||||
%assign cpuflags_fma3 (1<<15)| cpuflags_AVX
|
||||
|
||||
%assign cpuflags_cache32 (1<<16)
|
||||
%assign cpuflags_cache64 (1<<17)
|
||||
@ -594,17 +596,17 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
|
||||
%assign cpuflags cpuflags | cpuflags_%2
|
||||
%endif
|
||||
%xdefine SUFFIX _ %+ cpuname
|
||||
%if cpuflag(avx)
|
||||
%assign avx_enabled 1
|
||||
%if cpuflag(AVX)
|
||||
%assign AVX_enabled 1
|
||||
%endif
|
||||
%if mmsize == 16 && notcpuflag(sse2)
|
||||
%if mmsize == 16 && notcpuflag(SSE2)
|
||||
%define mova movaps
|
||||
%define movu movups
|
||||
%define movnta movntps
|
||||
%endif
|
||||
%if cpuflag(aligned)
|
||||
%define movu mova
|
||||
%elifidn %1, sse3
|
||||
%elifidn %1, SSE3
|
||||
%define movu lddqu
|
||||
%endif
|
||||
%else
|
||||
@ -614,7 +616,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
; merge mmx and sse*
|
||||
; merge MMX and SSE*
|
||||
|
||||
%macro CAT_XDEFINE 3
|
||||
%xdefine %1%2 %3
|
||||
@ -625,7 +627,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
|
||||
%endmacro
|
||||
|
||||
%macro INIT_MMX 0-1+
|
||||
%assign avx_enabled 0
|
||||
%assign AVX_enabled 0
|
||||
%define RESET_MM_PERMUTATION INIT_MMX %1
|
||||
%define mmsize 8
|
||||
%define num_mmregs 8
|
||||
@ -648,7 +650,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
|
||||
%endmacro
|
||||
|
||||
%macro INIT_XMM 0-1+
|
||||
%assign avx_enabled 0
|
||||
%assign AVX_enabled 0
|
||||
%define RESET_MM_PERMUTATION INIT_XMM %1
|
||||
%define mmsize 16
|
||||
%define num_mmregs 8
|
||||
@ -669,7 +671,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
|
||||
%endmacro
|
||||
|
||||
%macro INIT_YMM 0-1+
|
||||
%assign avx_enabled 1
|
||||
%assign AVX_enabled 1
|
||||
%define RESET_MM_PERMUTATION INIT_YMM %1
|
||||
%define mmsize 32
|
||||
%define num_mmregs 8
|
||||
@ -832,7 +834,7 @@ INIT_XMM
|
||||
%xdefine %%dst %2
|
||||
%rep %0-2
|
||||
%ifidn %%dst, %3
|
||||
%error non-avx emulation of ``%%opcode'' is not supported
|
||||
%error non-AVX emulation of ``%%opcode'' is not supported
|
||||
%endif
|
||||
%rotate 1
|
||||
%endrep
|
||||
@ -868,7 +870,7 @@ INIT_XMM
|
||||
|
||||
%if %4>=3+%3
|
||||
%ifnidn %5, %6
|
||||
%if avx_enabled && %%sizeofreg==16
|
||||
%if AVX_enabled && %%sizeofreg==16
|
||||
v%1 %5, %6, %7
|
||||
%else
|
||||
CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
|
||||
@ -891,7 +893,7 @@ INIT_XMM
|
||||
; So, if the op is symmetric and the wrong one is memory, swap them.
|
||||
%macro RUN_AVX_INSTR1 8
|
||||
%assign %%swap 0
|
||||
%if avx_enabled
|
||||
%if AVX_enabled
|
||||
%ifnid %6
|
||||
%assign %%swap 1
|
||||
%endif
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user