mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
Add MIPS SIMD Arch (MSA) optimized MirrorRow function
As per the preparation patch added in Chromium sources at, 2150943003: Add MIPS SIMD Arch (MSA) build flags for GYP/GN builds This patch adds first MSA optimized function in libYUV project. BUG=libyuv:634 R=fbarchard@google.com Review URL: https://codereview.chromium.org/2285683002 .
This commit is contained in:
parent
5da918b48d
commit
c5323b0fdc
@ -53,6 +53,12 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
source/scale_neon.cc.neon
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH_ABI),mips)
|
||||
LOCAL_CFLAGS += -DLIBYUV_MSA
|
||||
LOCAL_SRC_FILES += \
|
||||
source/row_msa.cc
|
||||
endif
|
||||
|
||||
LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
|
||||
LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
|
||||
|
||||
|
||||
15
BUILD.gn
15
BUILD.gn
@ -94,6 +94,10 @@ static_library("libyuv") {
|
||||
deps += [ ":libyuv_neon" ]
|
||||
}
|
||||
|
||||
if (libyuv_use_msa) {
|
||||
deps += [ ":libyuv_msa" ]
|
||||
}
|
||||
|
||||
if (is_nacl) {
|
||||
# Always enable optimization under NaCl to workaround crbug.com/538243 .
|
||||
configs -= [ "//build/config/compiler:default_optimization" ]
|
||||
@ -124,6 +128,17 @@ if (libyuv_use_neon) {
|
||||
}
|
||||
}
|
||||
|
||||
if (libyuv_use_msa) {
|
||||
static_library("libyuv_msa") {
|
||||
sources = [
|
||||
# MSA Source Files
|
||||
"source/row_msa.cc",
|
||||
]
|
||||
|
||||
public_configs = [ ":libyuv_config" ]
|
||||
}
|
||||
}
|
||||
|
||||
if (libyuv_include_tests) {
|
||||
config("libyuv_unittest_warnings_config") {
|
||||
if (!is_win) {
|
||||
|
||||
@ -40,6 +40,7 @@ set(ly_source_files
|
||||
${ly_src_dir}/row_any.cc
|
||||
${ly_src_dir}/row_common.cc
|
||||
${ly_src_dir}/row_mips.cc
|
||||
${ly_src_dir}/row_msa.cc
|
||||
${ly_src_dir}/row_neon.cc
|
||||
${ly_src_dir}/row_neon64.cc
|
||||
${ly_src_dir}/row_gcc.cc
|
||||
@ -80,6 +81,7 @@ set(ly_header_files
|
||||
${ly_inc_dir}/libyuv/convert_from.h
|
||||
${ly_inc_dir}/libyuv/convert_from_argb.h
|
||||
${ly_inc_dir}/libyuv/cpu_id.h
|
||||
${ly_inc_dir}/libyuv/macros_msa.h
|
||||
${ly_inc_dir}/libyuv/planar_functions.h
|
||||
${ly_inc_dir}/libyuv/rotate.h
|
||||
${ly_inc_dir}/libyuv/rotate_argb.h
|
||||
|
||||
@ -195,6 +195,16 @@ Running test with C code:
|
||||
gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true"
|
||||
ninja -C out/Official
|
||||
|
||||
#### Building mips with GN
|
||||
|
||||
mipsel
|
||||
gn gen out/Default "--args=is_debug=false target_cpu=\"mipsel\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
|
||||
ninja -C out/Default
|
||||
|
||||
mips64el
|
||||
gn gen out/Default "--args=is_debug=false target_cpu=\"mips64el\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
|
||||
ninja -C out/Default
|
||||
|
||||
### Linux
|
||||
|
||||
GYP_DEFINES="target_arch=x64" ./gyp_libyuv
|
||||
|
||||
@ -42,6 +42,7 @@ static const int kCpuHasAVX3 = 0x2000;
|
||||
// These flags are only valid on MIPS processors.
|
||||
static const int kCpuHasMIPS = 0x10000;
|
||||
static const int kCpuHasDSPR2 = 0x20000;
|
||||
static const int kCpuHasMSA = 0x40000;
|
||||
|
||||
// Internal function used to auto-init.
|
||||
LIBYUV_API
|
||||
|
||||
78
include/libyuv/macros_msa.h
Normal file
78
include/libyuv/macros_msa.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef __MACROS_MSA_H__
|
||||
#define __MACROS_MSA_H__
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include <stdint.h>
|
||||
#include <msa.h>
|
||||
|
||||
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc))
|
||||
#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
|
||||
|
||||
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in)
|
||||
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
|
||||
|
||||
/* Description : Load two vectors with 16 'byte' sized elements
|
||||
Arguments : Inputs - psrc, stride
|
||||
Outputs - out0, out1
|
||||
Return Type - as per RTYPE
|
||||
Details : Load 16 byte elements in 'out0' from (psrc)
|
||||
Load 16 byte elements in 'out1' from (psrc + stride)
|
||||
*/
|
||||
#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
|
||||
out0 = LD_B(RTYPE, (psrc)); \
|
||||
out1 = LD_B(RTYPE, (psrc) + stride); \
|
||||
}
|
||||
#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
|
||||
#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
|
||||
|
||||
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
|
||||
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||
LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
|
||||
}
|
||||
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
|
||||
#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
|
||||
|
||||
/* Description : Store two vectors with stride each having 16 'byte' sized
|
||||
elements
|
||||
Arguments : Inputs - in0, in1, pdst, stride
|
||||
Details : Store 16 byte elements from 'in0' to (pdst)
|
||||
Store 16 byte elements from 'in1' to (pdst + stride)
|
||||
*/
|
||||
#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
|
||||
ST_B(RTYPE, in0, (pdst)); \
|
||||
ST_B(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
|
||||
#define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__)
|
||||
|
||||
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
|
||||
ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
||||
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
||||
}
|
||||
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
|
||||
#define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__)
|
||||
|
||||
/* Description : Shuffle byte vector elements as per mask vector
|
||||
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
|
||||
Outputs - out0, out1
|
||||
Return Type - as per RTYPE
|
||||
Details : Byte elements from 'in0' & 'in1' are copied selectively to
|
||||
'out0' as per control vector 'mask0'
|
||||
*/
|
||||
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
|
||||
out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
|
||||
out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
|
||||
}
|
||||
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
|
||||
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
|
||||
#endif /* __MACROS_MSA_H__ */
|
||||
@ -372,6 +372,10 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#define HAS_MIRRORROW_MSA
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
||||
#if defined(VISUALC_HAS_AVX2)
|
||||
#define SIMD_ALIGNED(var) __declspec(align(32)) var
|
||||
@ -809,11 +813,13 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_MSA(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_C(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
|
||||
void MirrorRow_Any_MSA(const uint8* src, uint8* dst, int width);
|
||||
|
||||
void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width);
|
||||
|
||||
@ -8,10 +8,13 @@
|
||||
|
||||
import("//build_overrides/build.gni")
|
||||
import("//build/config/arm.gni")
|
||||
import("//build/config/mips.gni")
|
||||
|
||||
declare_args() {
|
||||
libyuv_include_tests = !build_with_chromium
|
||||
libyuv_disable_jpeg = false
|
||||
libyuv_use_neon = (current_cpu == "arm64" ||
|
||||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)))
|
||||
libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") &&
|
||||
mips_use_msa
|
||||
}
|
||||
|
||||
11
libyuv.gyp
11
libyuv.gyp
@ -26,12 +26,18 @@
|
||||
# Link-Time Optimizations.
|
||||
'use_lto%': 0,
|
||||
'build_neon': 0,
|
||||
'build_msa': 0,
|
||||
'conditions': [
|
||||
['(target_arch == "armv7" or target_arch == "armv7s" or \
|
||||
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
|
||||
and (arm_neon == 1 or arm_neon_optional == 1)', {
|
||||
'build_neon': 1,
|
||||
}],
|
||||
['(target_arch == "mipsel" or target_arch == "mips64el")\
|
||||
and (mips_msa == 1)',
|
||||
{
|
||||
'build_msa': 1,
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
@ -79,6 +85,11 @@
|
||||
}],
|
||||
],
|
||||
}],
|
||||
['build_msa != 0', {
|
||||
'defines': [
|
||||
'LIBYUV_MSA',
|
||||
],
|
||||
}],
|
||||
['OS != "ios" and libyuv_disable_jpeg != 1', {
|
||||
'defines': [
|
||||
'HAVE_JPEG'
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
'include/libyuv/convert_from.h',
|
||||
'include/libyuv/convert_from_argb.h',
|
||||
'include/libyuv/cpu_id.h',
|
||||
'include/libyuv/macros_msa.h',
|
||||
'include/libyuv/mjpeg_decoder.h',
|
||||
'include/libyuv/planar_functions.h',
|
||||
'include/libyuv/rotate.h',
|
||||
@ -61,6 +62,7 @@
|
||||
'source/row_common.cc',
|
||||
'source/row_gcc.cc',
|
||||
'source/row_mips.cc',
|
||||
'source/row_msa.cc',
|
||||
'source/row_neon.cc',
|
||||
'source/row_neon64.cc',
|
||||
'source/row_win.cc',
|
||||
|
||||
@ -86,6 +86,12 @@
|
||||
'LIBYUV_NEON'
|
||||
],
|
||||
}],
|
||||
[ '(target_arch == "mipsel" or target_arch == "mips64el") \
|
||||
and (mips_msa == 1)', {
|
||||
'defines': [
|
||||
'LIBYUV_MSA'
|
||||
],
|
||||
}],
|
||||
], # conditions
|
||||
'defines': [
|
||||
# Enable the following 3 macros to turn off assembly for specified CPU.
|
||||
|
||||
@ -161,6 +161,38 @@ int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API SAFEBUFFERS
|
||||
int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) {
|
||||
char cpuinfo_line[512];
|
||||
int len = strlen(ase);
|
||||
FILE* f = fopen(cpuinfo_name, "r");
|
||||
if (!f) {
|
||||
// ase enabled if /proc/cpuinfo is unavailable.
|
||||
if(strcmp(ase, " msa") == 0) {
|
||||
return kCpuHasMSA;
|
||||
}
|
||||
if(strcmp(ase, " dspr2") == 0) {
|
||||
return kCpuHasDSPR2;
|
||||
}
|
||||
}
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
|
||||
if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
|
||||
char* p = strstr(cpuinfo_line, ase);
|
||||
if (p && (p[len] == ' ' || p[len] == '\n')) {
|
||||
fclose(f);
|
||||
if(strcmp(ase, " msa") == 0) {
|
||||
return kCpuHasMSA;
|
||||
}
|
||||
if(strcmp(ase, " dspr2") == 0) {
|
||||
return kCpuHasDSPR2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CPU detect function for SIMD instruction sets.
|
||||
LIBYUV_API
|
||||
int cpu_info_ = 0; // cpu_info is not initialized yet.
|
||||
@ -253,11 +285,17 @@ int InitCpuFlags(void) {
|
||||
#if defined(__mips__) && defined(__linux__)
|
||||
#if defined(__mips_dspr2)
|
||||
cpu_info |= kCpuHasDSPR2;
|
||||
#endif
|
||||
#if defined(__mips_msa)
|
||||
cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
|
||||
#endif
|
||||
cpu_info |= kCpuHasMIPS;
|
||||
if (getenv("LIBYUV_DISABLE_DSPR2")) {
|
||||
cpu_info &= ~kCpuHasDSPR2;
|
||||
}
|
||||
if (getenv("LIBYUV_DISABLE_MSA")) {
|
||||
cpu_info &= ~kCpuHasMSA;
|
||||
}
|
||||
#endif
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// gcc -mfpu=neon defines __ARM_NEON__
|
||||
|
||||
@ -401,6 +401,14 @@ void MirrorPlane(const uint8* src_y, int src_stride_y,
|
||||
IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
|
||||
MirrorRow = MirrorRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
MirrorRow = MirrorRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 64)) {
|
||||
MirrorRow = MirrorRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Mirror plane
|
||||
|
||||
@ -141,6 +141,14 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
MirrorRow = MirrorRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
MirrorRow = MirrorRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 64)) {
|
||||
MirrorRow = MirrorRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
||||
|
||||
@ -631,6 +631,9 @@ ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
|
||||
#ifdef HAS_MIRRORROW_NEON
|
||||
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_MIRRORROW_MSA
|
||||
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
|
||||
#endif
|
||||
#ifdef HAS_ARGBMIRRORROW_AVX2
|
||||
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
|
||||
#endif
|
||||
|
||||
45
source/row_msa.cc
Normal file
45
source/row_msa.cc
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include "libyuv/macros_msa.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
|
||||
int count;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v16u8 dst0, dst1, dst2, dst3;
|
||||
v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
|
||||
|
||||
src += width - 64;
|
||||
|
||||
for (count = 0; count < width; count += 64) {
|
||||
LD_UB4(src, 16, src3, src2, src1, src0);
|
||||
VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
|
||||
VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
|
||||
dst += 64;
|
||||
src -= 64;
|
||||
}
|
||||
}
|
||||
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
Loading…
x
Reference in New Issue
Block a user