mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2026-01-01 03:12:16 +08:00
lsl by 2 requires a number sign for xcode on ios 64 bit build. add the # sign for ios compatibility. remove legacy x86 asm files that are unused. the unused files cause complications in build systems that build all files.
BUG=libyuv:423 TESTED=try bots R=noahric@google.com Review URL: https://webrtc-codereview.appspot.com/45119004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1369 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
parent
32ad6e0e12
commit
c9986313ac
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1368
|
||||
Version: 1369
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -249,21 +249,6 @@ extern "C" {
|
||||
#define HAS_ARGBUNATTENUATEROW_AVX2
|
||||
#endif
|
||||
|
||||
// The following are Yasm x86 only:
|
||||
// TODO(fbarchard): Port AVX2 to inline.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
|
||||
(defined(_M_IX86) || defined(_M_X64) || \
|
||||
defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_MERGEUVROW_AVX2
|
||||
#define HAS_MERGEUVROW_MMX
|
||||
#define HAS_SPLITUVROW_AVX2
|
||||
#define HAS_SPLITUVROW_MMX
|
||||
#define HAS_UYVYTOYROW_AVX2
|
||||
#define HAS_UYVYTOYROW_MMX
|
||||
#define HAS_YUY2TOYROW_AVX2
|
||||
#define HAS_YUY2TOYROW_MMX
|
||||
#endif
|
||||
|
||||
// The following are disabled when SSSE3 is available:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1368
|
||||
#define LIBYUV_VERSION 1369
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -1,146 +0,0 @@
|
||||
;
|
||||
; Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%ifdef __YASM_VERSION_ID__
|
||||
%if __YASM_VERSION_ID__ < 01020000h
|
||||
%error AVX2 is supported only by yasm 1.2.0 or later.
|
||||
%endif
|
||||
%endif
|
||||
%include "x86inc.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
; cglobal numeric constants are parameters, gpr regs, mm regs
|
||||
|
||||
; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
|
||||
|
||||
%macro YUY2TOYROW 2-3
|
||||
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
|
||||
%ifidn %1,YUY2
|
||||
pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
|
||||
psrlw m2, m2, 8
|
||||
%endif
|
||||
|
||||
ALIGN 4
|
||||
.convertloop:
|
||||
mov%2 m0, [src_yuy2q]
|
||||
mov%2 m1, [src_yuy2q + mmsize]
|
||||
lea src_yuy2q, [src_yuy2q + mmsize * 2]
|
||||
%ifidn %1,YUY2
|
||||
pand m0, m0, m2 ; YUY2 even bytes are Y
|
||||
pand m1, m1, m2
|
||||
%else
|
||||
psrlw m0, m0, 8 ; UYVY odd bytes are Y
|
||||
psrlw m1, m1, 8
|
||||
%endif
|
||||
packuswb m0, m0, m1
|
||||
%if cpuflag(AVX2)
|
||||
vpermq m0, m0, 0xd8
|
||||
%endif
|
||||
sub pixd, mmsize
|
||||
mov%2 [dst_yq], m0
|
||||
lea dst_yq, [dst_yq + mmsize]
|
||||
jg .convertloop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
|
||||
INIT_MMX MMX
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW YUY2,u,_Unaligned
|
||||
YUY2TOYROW UYVY,a,
|
||||
YUY2TOYROW UYVY,u,_Unaligned
|
||||
INIT_XMM SSE2
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW YUY2,u,_Unaligned
|
||||
YUY2TOYROW UYVY,a,
|
||||
YUY2TOYROW UYVY,u,_Unaligned
|
||||
INIT_YMM AVX2
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW UYVY,a,
|
||||
|
||||
; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
|
||||
|
||||
%macro SplitUVRow 1-2
|
||||
cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
|
||||
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
|
||||
psrlw m4, m4, 8
|
||||
sub dst_vq, dst_uq
|
||||
|
||||
ALIGN 4
|
||||
.convertloop:
|
||||
mov%1 m0, [src_uvq]
|
||||
mov%1 m1, [src_uvq + mmsize]
|
||||
lea src_uvq, [src_uvq + mmsize * 2]
|
||||
psrlw m2, m0, 8 ; odd bytes
|
||||
psrlw m3, m1, 8
|
||||
pand m0, m0, m4 ; even bytes
|
||||
pand m1, m1, m4
|
||||
packuswb m0, m0, m1
|
||||
packuswb m2, m2, m3
|
||||
%if cpuflag(AVX2)
|
||||
vpermq m0, m0, 0xd8
|
||||
vpermq m2, m2, 0xd8
|
||||
%endif
|
||||
mov%1 [dst_uq], m0
|
||||
mov%1 [dst_uq + dst_vq], m2
|
||||
lea dst_uq, [dst_uq + mmsize]
|
||||
sub pixd, mmsize
|
||||
jg .convertloop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX MMX
|
||||
SplitUVRow a,
|
||||
SplitUVRow u,_Unaligned
|
||||
INIT_XMM SSE2
|
||||
SplitUVRow a,
|
||||
SplitUVRow u,_Unaligned
|
||||
INIT_YMM AVX2
|
||||
SplitUVRow a,
|
||||
|
||||
; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
; int width);
|
||||
|
||||
%macro MergeUVRow_ 1-2
|
||||
cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
|
||||
sub src_vq, src_uq
|
||||
|
||||
ALIGN 4
|
||||
.convertloop:
|
||||
mov%1 m0, [src_uq]
|
||||
mov%1 m1, [src_vq]
|
||||
lea src_uq, [src_uq + mmsize]
|
||||
punpcklbw m2, m0, m1 // first 8 UV pairs
|
||||
punpckhbw m0, m0, m1 // next 8 UV pairs
|
||||
%if cpuflag(AVX2)
|
||||
vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
|
||||
vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
|
||||
mov%1 [dst_uvq], m1
|
||||
mov%1 [dst_uvq + mmsize], m2
|
||||
%else
|
||||
mov%1 [dst_uvq], m2
|
||||
mov%1 [dst_uvq + mmsize], m0
|
||||
%endif
|
||||
lea dst_uvq, [dst_uvq + mmsize * 2]
|
||||
sub pixd, mmsize
|
||||
jg .convertloop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX MMX
|
||||
MergeUVRow_ a,
|
||||
MergeUVRow_ u,_Unaligned
|
||||
INIT_XMM SSE2
|
||||
MergeUVRow_ a,
|
||||
MergeUVRow_ u,_Unaligned
|
||||
INIT_YMM AVX2
|
||||
MergeUVRow_ a,
|
||||
|
||||
@ -587,7 +587,7 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int *tmp = dx_offset;
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_ptr;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
@ -971,7 +971,7 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int *tmp = dx_offset;
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_argb;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
|
||||
@ -590,7 +590,7 @@ void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
|
||||
int dst_width, int x, int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int *tmp = dx_offset;
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_ptr;
|
||||
asm volatile (
|
||||
"dup v0.4s, %w3 \n" // x
|
||||
@ -862,7 +862,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
|
||||
// Reads 4 pixels at a time.
|
||||
// Alignment requirement: src_argb 4 byte aligned.
|
||||
// TODO, might be worth another optimization pass in future.
|
||||
// TODO(Yang Zhang): Might be worth another optimization pass in future.
|
||||
// It could be upgraded to 8 pixels at a time to start with.
|
||||
void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_stepx,
|
||||
@ -917,7 +917,7 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
// the x/dx stepping
|
||||
#define LOAD1_DATA32_LANE(vn, n) \
|
||||
"lsr %5, %3, #16 \n" \
|
||||
"add %6, %1, %5, lsl 2 \n" \
|
||||
"add %6, %1, %5, lsl #2 \n" \
|
||||
"add %3, %3, %4 \n" \
|
||||
MEMACCESS(6) \
|
||||
"ld1 {"#vn".s}["#n"], [%6] \n"
|
||||
@ -967,7 +967,7 @@ void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) {
|
||||
int dx_offset[4] = {0, 1, 2, 3};
|
||||
int *tmp = dx_offset;
|
||||
int* tmp = dx_offset;
|
||||
const uint8* src_tmp = src_argb;
|
||||
asm volatile (
|
||||
"dup v0.4s, %w3 \n" // x
|
||||
|
||||
1136
source/x86inc.asm
1136
source/x86inc.asm
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user