mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
ARGBExtractAlpha 16 pixels at a time for ARM
arm64 8 TestARGBExtractAlpha (10019 ms) <-original 64 bit code arm64 8 x2 TestARGBExtractAlpha (7639 ms) arm64 16 TestARGBExtractAlpha (7369 ms) <- new 64 bit code thumb32 8 TestARGBExtractAlpha (9505 ms) <- original 32 bit code thumb32 8 x2 TestARGBExtractAlpha (7400 ms) thumb32 8 x2i TestARGBExtractAlpha (7266 ms) <- new 32 bit code arm32 8 TestARGBExtractAlpha (10002 ms) BUG=libyuv:572 TESTED=local test on nexus 9 R=harryjin@google.com, wangcheng@google.com Review URL: https://codereview.chromium.org/2035573002 .
This commit is contained in:
parent
462be27ec8
commit
6546096269
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1593
|
||||
Version: 1594
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1593
|
||||
#define LIBYUV_VERSION 1594
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||
|
||||
@ -61,6 +61,7 @@
|
||||
'-mfpu=vfp',
|
||||
'-mfpu=vfpv3',
|
||||
'-mfpu=vfpv3-d16',
|
||||
# '-mthumb', # arm32 not thumb
|
||||
],
|
||||
'conditions': [
|
||||
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
|
||||
@ -74,6 +75,7 @@
|
||||
['target_arch != "arm64"', {
|
||||
'cflags': [
|
||||
'-mfpu=neon',
|
||||
# '-marm', # arm32 not thumb
|
||||
],
|
||||
}],
|
||||
],
|
||||
|
||||
@ -2404,8 +2404,8 @@ int ARGBExtractAlpha(const uint8* src_argb, int src_stride,
|
||||
#endif
|
||||
#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_NEON
|
||||
: ARGBExtractAlphaRow_Any_NEON;
|
||||
ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
|
||||
: ARGBExtractAlphaRow_Any_NEON;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -470,7 +470,7 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
|
||||
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
|
||||
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 7)
|
||||
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
|
||||
#endif
|
||||
#undef ANY11
|
||||
|
||||
|
||||
@ -1302,16 +1302,17 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d3}, [%1]! \n" // store 8 A's.
|
||||
"vst1.8 {q3}, [%1]! \n" // store 16 A's.
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -450,7 +450,6 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
|
||||
void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
int64 width64 = (int64)(width);
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"movi v23.8b, #255 \n"
|
||||
@ -463,7 +462,7 @@ void I400ToARGBRow_NEON(const uint8* src_y,
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_y), // %0
|
||||
"+r"(dst_argb), // %1
|
||||
"+r"(width64) // %2
|
||||
"+r"(width) // %2
|
||||
: [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
|
||||
[kUVToG]"r"(&kYuvI601Constants.kUVToG),
|
||||
[kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
|
||||
@ -1404,10 +1403,10 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels
|
||||
"subs %w2, %w2, #8 \n" // 8 processed per loop
|
||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 pixels
|
||||
"subs %w2, %w2, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(1)
|
||||
"st1 {v3.8b}, [%1], #8 \n" // store 8 A's.
|
||||
"st1 {v3.16b}, [%1], #16 \n" // store 16 A's.
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_a), // %1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user