mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
ARGBExtractAlpha 16 pixels at a time for ARM
arm64 8 TestARGBExtractAlpha (10019 ms) <-original 64 bit code arm64 8 x2 TestARGBExtractAlpha (7639 ms) arm64 16 TestARGBExtractAlpha (7369 ms) <- new 64 bit code thumb32 8 TestARGBExtractAlpha (9505 ms) <- original 32 bit code thumb32 8 x2 TestARGBExtractAlpha (7400 ms) thumb32 8 x2i TestARGBExtractAlpha (7266 ms) <- new 32 bit code arm32 8 TestARGBExtractAlpha (10002 ms) BUG=libyuv:572 TESTED=local test on nexus 9 R=harryjin@google.com, wangcheng@google.com Review URL: https://codereview.chromium.org/2035573002 .
This commit is contained in:
parent
462be27ec8
commit
6546096269
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1593
|
Version: 1594
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1593
|
#define LIBYUV_VERSION 1594
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
||||||
|
|||||||
@ -61,6 +61,7 @@
|
|||||||
'-mfpu=vfp',
|
'-mfpu=vfp',
|
||||||
'-mfpu=vfpv3',
|
'-mfpu=vfpv3',
|
||||||
'-mfpu=vfpv3-d16',
|
'-mfpu=vfpv3-d16',
|
||||||
|
# '-mthumb', # arm32 not thumb
|
||||||
],
|
],
|
||||||
'conditions': [
|
'conditions': [
|
||||||
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
|
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
|
||||||
@ -74,6 +75,7 @@
|
|||||||
['target_arch != "arm64"', {
|
['target_arch != "arm64"', {
|
||||||
'cflags': [
|
'cflags': [
|
||||||
'-mfpu=neon',
|
'-mfpu=neon',
|
||||||
|
# '-marm', # arm32 not thumb
|
||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
],
|
],
|
||||||
|
|||||||
@ -2404,7 +2404,7 @@ int ARGBExtractAlpha(const uint8* src_argb, int src_stride,
|
|||||||
#endif
|
#endif
|
||||||
#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
|
#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_NEON
|
ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
|
||||||
: ARGBExtractAlphaRow_Any_NEON;
|
: ARGBExtractAlphaRow_Any_NEON;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -470,7 +470,7 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
|
|||||||
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
|
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
|
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
|
||||||
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 7)
|
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
|
||||||
#endif
|
#endif
|
||||||
#undef ANY11
|
#undef ANY11
|
||||||
|
|
||||||
|
|||||||
@ -1302,16 +1302,17 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
|
|||||||
asm volatile (
|
asm volatile (
|
||||||
"1: \n"
|
"1: \n"
|
||||||
MEMACCESS(0)
|
MEMACCESS(0)
|
||||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels
|
||||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels
|
||||||
|
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||||
MEMACCESS(1)
|
MEMACCESS(1)
|
||||||
"vst1.8 {d3}, [%1]! \n" // store 8 A's.
|
"vst1.8 {q3}, [%1]! \n" // store 16 A's.
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_a), // %1
|
"+r"(dst_a), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
:
|
:
|
||||||
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
|
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -450,7 +450,6 @@ void I422ToARGB4444Row_NEON(const uint8* src_y,
|
|||||||
void I400ToARGBRow_NEON(const uint8* src_y,
|
void I400ToARGBRow_NEON(const uint8* src_y,
|
||||||
uint8* dst_argb,
|
uint8* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
int64 width64 = (int64)(width);
|
|
||||||
asm volatile (
|
asm volatile (
|
||||||
YUVTORGB_SETUP
|
YUVTORGB_SETUP
|
||||||
"movi v23.8b, #255 \n"
|
"movi v23.8b, #255 \n"
|
||||||
@ -463,7 +462,7 @@ void I400ToARGBRow_NEON(const uint8* src_y,
|
|||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_y), // %0
|
: "+r"(src_y), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width64) // %2
|
"+r"(width) // %2
|
||||||
: [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
|
: [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
|
||||||
[kUVToG]"r"(&kYuvI601Constants.kUVToG),
|
[kUVToG]"r"(&kYuvI601Constants.kUVToG),
|
||||||
[kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
|
[kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
|
||||||
@ -1404,10 +1403,10 @@ void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) {
|
|||||||
asm volatile (
|
asm volatile (
|
||||||
"1: \n"
|
"1: \n"
|
||||||
MEMACCESS(0)
|
MEMACCESS(0)
|
||||||
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load row 8 pixels
|
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 pixels
|
||||||
"subs %w2, %w2, #8 \n" // 8 processed per loop
|
"subs %w2, %w2, #16 \n" // 16 processed per loop
|
||||||
MEMACCESS(1)
|
MEMACCESS(1)
|
||||||
"st1 {v3.8b}, [%1], #8 \n" // store 8 A's.
|
"st1 {v3.16b}, [%1], #16 \n" // store 16 A's.
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_a), // %1
|
"+r"(dst_a), // %1
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user