mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Add MIPS SIMD Arch (MSA) optimized MirrorRow function
As per the preparation patch added in Chromium sources at, 2150943003: Add MIPS SIMD Arch (MSA) build flags for GYP/GN builds This patch adds first MSA optimized function in libYUV project. BUG=libyuv:634 R=fbarchard@google.com Review URL: https://codereview.chromium.org/2285683002 .
This commit is contained in:
parent
5da918b48d
commit
c5323b0fdc
@ -53,6 +53,12 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
|||||||
source/scale_neon.cc.neon
|
source/scale_neon.cc.neon
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET_ARCH_ABI),mips)
|
||||||
|
LOCAL_CFLAGS += -DLIBYUV_MSA
|
||||||
|
LOCAL_SRC_FILES += \
|
||||||
|
source/row_msa.cc
|
||||||
|
endif
|
||||||
|
|
||||||
LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
|
LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
|
||||||
LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
|
LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
|
||||||
|
|
||||||
|
|||||||
15
BUILD.gn
15
BUILD.gn
@ -94,6 +94,10 @@ static_library("libyuv") {
|
|||||||
deps += [ ":libyuv_neon" ]
|
deps += [ ":libyuv_neon" ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (libyuv_use_msa) {
|
||||||
|
deps += [ ":libyuv_msa" ]
|
||||||
|
}
|
||||||
|
|
||||||
if (is_nacl) {
|
if (is_nacl) {
|
||||||
# Always enable optimization under NaCl to workaround crbug.com/538243 .
|
# Always enable optimization under NaCl to workaround crbug.com/538243 .
|
||||||
configs -= [ "//build/config/compiler:default_optimization" ]
|
configs -= [ "//build/config/compiler:default_optimization" ]
|
||||||
@ -124,6 +128,17 @@ if (libyuv_use_neon) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (libyuv_use_msa) {
|
||||||
|
static_library("libyuv_msa") {
|
||||||
|
sources = [
|
||||||
|
# MSA Source Files
|
||||||
|
"source/row_msa.cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
public_configs = [ ":libyuv_config" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (libyuv_include_tests) {
|
if (libyuv_include_tests) {
|
||||||
config("libyuv_unittest_warnings_config") {
|
config("libyuv_unittest_warnings_config") {
|
||||||
if (!is_win) {
|
if (!is_win) {
|
||||||
|
|||||||
@ -40,6 +40,7 @@ set(ly_source_files
|
|||||||
${ly_src_dir}/row_any.cc
|
${ly_src_dir}/row_any.cc
|
||||||
${ly_src_dir}/row_common.cc
|
${ly_src_dir}/row_common.cc
|
||||||
${ly_src_dir}/row_mips.cc
|
${ly_src_dir}/row_mips.cc
|
||||||
|
${ly_src_dir}/row_msa.cc
|
||||||
${ly_src_dir}/row_neon.cc
|
${ly_src_dir}/row_neon.cc
|
||||||
${ly_src_dir}/row_neon64.cc
|
${ly_src_dir}/row_neon64.cc
|
||||||
${ly_src_dir}/row_gcc.cc
|
${ly_src_dir}/row_gcc.cc
|
||||||
@ -80,6 +81,7 @@ set(ly_header_files
|
|||||||
${ly_inc_dir}/libyuv/convert_from.h
|
${ly_inc_dir}/libyuv/convert_from.h
|
||||||
${ly_inc_dir}/libyuv/convert_from_argb.h
|
${ly_inc_dir}/libyuv/convert_from_argb.h
|
||||||
${ly_inc_dir}/libyuv/cpu_id.h
|
${ly_inc_dir}/libyuv/cpu_id.h
|
||||||
|
${ly_inc_dir}/libyuv/macros_msa.h
|
||||||
${ly_inc_dir}/libyuv/planar_functions.h
|
${ly_inc_dir}/libyuv/planar_functions.h
|
||||||
${ly_inc_dir}/libyuv/rotate.h
|
${ly_inc_dir}/libyuv/rotate.h
|
||||||
${ly_inc_dir}/libyuv/rotate_argb.h
|
${ly_inc_dir}/libyuv/rotate_argb.h
|
||||||
|
|||||||
@ -195,6 +195,16 @@ Running test with C code:
|
|||||||
gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true"
|
gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true"
|
||||||
ninja -C out/Official
|
ninja -C out/Official
|
||||||
|
|
||||||
|
#### Building mips with GN
|
||||||
|
|
||||||
|
mipsel
|
||||||
|
gn gen out/Default "--args=is_debug=false target_cpu=\"mipsel\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
|
||||||
|
ninja -C out/Default
|
||||||
|
|
||||||
|
mips64el
|
||||||
|
gn gen out/Default "--args=is_debug=false target_cpu=\"mips64el\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
|
||||||
|
ninja -C out/Default
|
||||||
|
|
||||||
### Linux
|
### Linux
|
||||||
|
|
||||||
GYP_DEFINES="target_arch=x64" ./gyp_libyuv
|
GYP_DEFINES="target_arch=x64" ./gyp_libyuv
|
||||||
|
|||||||
@ -42,6 +42,7 @@ static const int kCpuHasAVX3 = 0x2000;
|
|||||||
// These flags are only valid on MIPS processors.
|
// These flags are only valid on MIPS processors.
|
||||||
static const int kCpuHasMIPS = 0x10000;
|
static const int kCpuHasMIPS = 0x10000;
|
||||||
static const int kCpuHasDSPR2 = 0x20000;
|
static const int kCpuHasDSPR2 = 0x20000;
|
||||||
|
static const int kCpuHasMSA = 0x40000;
|
||||||
|
|
||||||
// Internal function used to auto-init.
|
// Internal function used to auto-init.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
78
include/libyuv/macros_msa.h
Normal file
78
include/libyuv/macros_msa.h
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __MACROS_MSA_H__
|
||||||
|
#define __MACROS_MSA_H__
|
||||||
|
|
||||||
|
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <msa.h>
|
||||||
|
|
||||||
|
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc))
|
||||||
|
#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
|
||||||
|
|
||||||
|
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in)
|
||||||
|
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
|
||||||
|
|
||||||
|
/* Description : Load two vectors with 16 'byte' sized elements
|
||||||
|
Arguments : Inputs - psrc, stride
|
||||||
|
Outputs - out0, out1
|
||||||
|
Return Type - as per RTYPE
|
||||||
|
Details : Load 16 byte elements in 'out0' from (psrc)
|
||||||
|
Load 16 byte elements in 'out1' from (psrc + stride)
|
||||||
|
*/
|
||||||
|
#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
|
||||||
|
out0 = LD_B(RTYPE, (psrc)); \
|
||||||
|
out1 = LD_B(RTYPE, (psrc) + stride); \
|
||||||
|
}
|
||||||
|
#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
|
||||||
|
#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
|
||||||
|
|
||||||
|
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
|
||||||
|
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||||
|
LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
|
||||||
|
}
|
||||||
|
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
|
||||||
|
#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
|
||||||
|
|
||||||
|
/* Description : Store two vectors with stride each having 16 'byte' sized
|
||||||
|
elements
|
||||||
|
Arguments : Inputs - in0, in1, pdst, stride
|
||||||
|
Details : Store 16 byte elements from 'in0' to (pdst)
|
||||||
|
Store 16 byte elements from 'in1' to (pdst + stride)
|
||||||
|
*/
|
||||||
|
#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
|
||||||
|
ST_B(RTYPE, in0, (pdst)); \
|
||||||
|
ST_B(RTYPE, in1, (pdst) + stride); \
|
||||||
|
}
|
||||||
|
#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
|
||||||
|
#define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__)
|
||||||
|
|
||||||
|
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
|
||||||
|
ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
||||||
|
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
||||||
|
}
|
||||||
|
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
|
||||||
|
#define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__)
|
||||||
|
|
||||||
|
/* Description : Shuffle byte vector elements as per mask vector
|
||||||
|
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
|
||||||
|
Outputs - out0, out1
|
||||||
|
Return Type - as per RTYPE
|
||||||
|
Details : Byte elements from 'in0' & 'in1' are copied selectively to
|
||||||
|
'out0' as per control vector 'mask0'
|
||||||
|
*/
|
||||||
|
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
|
||||||
|
out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
|
||||||
|
out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
|
||||||
|
}
|
||||||
|
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
|
||||||
|
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
|
||||||
|
#endif /* __MACROS_MSA_H__ */
|
||||||
@ -372,6 +372,10 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||||
|
#define HAS_MIRRORROW_MSA
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
||||||
#if defined(VISUALC_HAS_AVX2)
|
#if defined(VISUALC_HAS_AVX2)
|
||||||
#define SIMD_ALIGNED(var) __declspec(align(32)) var
|
#define SIMD_ALIGNED(var) __declspec(align(32)) var
|
||||||
@ -809,11 +813,13 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
|
|||||||
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
|
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
|
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
|
||||||
|
void MirrorRow_MSA(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_C(const uint8* src, uint8* dst, int width);
|
void MirrorRow_C(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
|
void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
|
void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
|
void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
|
||||||
void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
|
void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
|
||||||
|
void MirrorRow_Any_MSA(const uint8* src, uint8* dst, int width);
|
||||||
|
|
||||||
void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||||
int width);
|
int width);
|
||||||
|
|||||||
@ -8,10 +8,13 @@
|
|||||||
|
|
||||||
import("//build_overrides/build.gni")
|
import("//build_overrides/build.gni")
|
||||||
import("//build/config/arm.gni")
|
import("//build/config/arm.gni")
|
||||||
|
import("//build/config/mips.gni")
|
||||||
|
|
||||||
declare_args() {
|
declare_args() {
|
||||||
libyuv_include_tests = !build_with_chromium
|
libyuv_include_tests = !build_with_chromium
|
||||||
libyuv_disable_jpeg = false
|
libyuv_disable_jpeg = false
|
||||||
libyuv_use_neon = (current_cpu == "arm64" ||
|
libyuv_use_neon = (current_cpu == "arm64" ||
|
||||||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)))
|
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)))
|
||||||
|
libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") &&
|
||||||
|
mips_use_msa
|
||||||
}
|
}
|
||||||
|
|||||||
11
libyuv.gyp
11
libyuv.gyp
@ -26,12 +26,18 @@
|
|||||||
# Link-Time Optimizations.
|
# Link-Time Optimizations.
|
||||||
'use_lto%': 0,
|
'use_lto%': 0,
|
||||||
'build_neon': 0,
|
'build_neon': 0,
|
||||||
|
'build_msa': 0,
|
||||||
'conditions': [
|
'conditions': [
|
||||||
['(target_arch == "armv7" or target_arch == "armv7s" or \
|
['(target_arch == "armv7" or target_arch == "armv7s" or \
|
||||||
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
|
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
|
||||||
and (arm_neon == 1 or arm_neon_optional == 1)', {
|
and (arm_neon == 1 or arm_neon_optional == 1)', {
|
||||||
'build_neon': 1,
|
'build_neon': 1,
|
||||||
}],
|
}],
|
||||||
|
['(target_arch == "mipsel" or target_arch == "mips64el")\
|
||||||
|
and (mips_msa == 1)',
|
||||||
|
{
|
||||||
|
'build_msa': 1,
|
||||||
|
}],
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -79,6 +85,11 @@
|
|||||||
}],
|
}],
|
||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
|
['build_msa != 0', {
|
||||||
|
'defines': [
|
||||||
|
'LIBYUV_MSA',
|
||||||
|
],
|
||||||
|
}],
|
||||||
['OS != "ios" and libyuv_disable_jpeg != 1', {
|
['OS != "ios" and libyuv_disable_jpeg != 1', {
|
||||||
'defines': [
|
'defines': [
|
||||||
'HAVE_JPEG'
|
'HAVE_JPEG'
|
||||||
|
|||||||
@ -18,6 +18,7 @@
|
|||||||
'include/libyuv/convert_from.h',
|
'include/libyuv/convert_from.h',
|
||||||
'include/libyuv/convert_from_argb.h',
|
'include/libyuv/convert_from_argb.h',
|
||||||
'include/libyuv/cpu_id.h',
|
'include/libyuv/cpu_id.h',
|
||||||
|
'include/libyuv/macros_msa.h',
|
||||||
'include/libyuv/mjpeg_decoder.h',
|
'include/libyuv/mjpeg_decoder.h',
|
||||||
'include/libyuv/planar_functions.h',
|
'include/libyuv/planar_functions.h',
|
||||||
'include/libyuv/rotate.h',
|
'include/libyuv/rotate.h',
|
||||||
@ -61,6 +62,7 @@
|
|||||||
'source/row_common.cc',
|
'source/row_common.cc',
|
||||||
'source/row_gcc.cc',
|
'source/row_gcc.cc',
|
||||||
'source/row_mips.cc',
|
'source/row_mips.cc',
|
||||||
|
'source/row_msa.cc',
|
||||||
'source/row_neon.cc',
|
'source/row_neon.cc',
|
||||||
'source/row_neon64.cc',
|
'source/row_neon64.cc',
|
||||||
'source/row_win.cc',
|
'source/row_win.cc',
|
||||||
|
|||||||
@ -86,6 +86,12 @@
|
|||||||
'LIBYUV_NEON'
|
'LIBYUV_NEON'
|
||||||
],
|
],
|
||||||
}],
|
}],
|
||||||
|
[ '(target_arch == "mipsel" or target_arch == "mips64el") \
|
||||||
|
and (mips_msa == 1)', {
|
||||||
|
'defines': [
|
||||||
|
'LIBYUV_MSA'
|
||||||
|
],
|
||||||
|
}],
|
||||||
], # conditions
|
], # conditions
|
||||||
'defines': [
|
'defines': [
|
||||||
# Enable the following 3 macros to turn off assembly for specified CPU.
|
# Enable the following 3 macros to turn off assembly for specified CPU.
|
||||||
|
|||||||
@ -161,6 +161,38 @@ int ArmCpuCaps(const char* cpuinfo_name) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LIBYUV_API SAFEBUFFERS
|
||||||
|
int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) {
|
||||||
|
char cpuinfo_line[512];
|
||||||
|
int len = strlen(ase);
|
||||||
|
FILE* f = fopen(cpuinfo_name, "r");
|
||||||
|
if (!f) {
|
||||||
|
// ase enabled if /proc/cpuinfo is unavailable.
|
||||||
|
if(strcmp(ase, " msa") == 0) {
|
||||||
|
return kCpuHasMSA;
|
||||||
|
}
|
||||||
|
if(strcmp(ase, " dspr2") == 0) {
|
||||||
|
return kCpuHasDSPR2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
|
||||||
|
if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
|
||||||
|
char* p = strstr(cpuinfo_line, ase);
|
||||||
|
if (p && (p[len] == ' ' || p[len] == '\n')) {
|
||||||
|
fclose(f);
|
||||||
|
if(strcmp(ase, " msa") == 0) {
|
||||||
|
return kCpuHasMSA;
|
||||||
|
}
|
||||||
|
if(strcmp(ase, " dspr2") == 0) {
|
||||||
|
return kCpuHasDSPR2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// CPU detect function for SIMD instruction sets.
|
// CPU detect function for SIMD instruction sets.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int cpu_info_ = 0; // cpu_info is not initialized yet.
|
int cpu_info_ = 0; // cpu_info is not initialized yet.
|
||||||
@ -253,11 +285,17 @@ int InitCpuFlags(void) {
|
|||||||
#if defined(__mips__) && defined(__linux__)
|
#if defined(__mips__) && defined(__linux__)
|
||||||
#if defined(__mips_dspr2)
|
#if defined(__mips_dspr2)
|
||||||
cpu_info |= kCpuHasDSPR2;
|
cpu_info |= kCpuHasDSPR2;
|
||||||
|
#endif
|
||||||
|
#if defined(__mips_msa)
|
||||||
|
cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
|
||||||
#endif
|
#endif
|
||||||
cpu_info |= kCpuHasMIPS;
|
cpu_info |= kCpuHasMIPS;
|
||||||
if (getenv("LIBYUV_DISABLE_DSPR2")) {
|
if (getenv("LIBYUV_DISABLE_DSPR2")) {
|
||||||
cpu_info &= ~kCpuHasDSPR2;
|
cpu_info &= ~kCpuHasDSPR2;
|
||||||
}
|
}
|
||||||
|
if (getenv("LIBYUV_DISABLE_MSA")) {
|
||||||
|
cpu_info &= ~kCpuHasMSA;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(__arm__) || defined(__aarch64__)
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
// gcc -mfpu=neon defines __ARM_NEON__
|
// gcc -mfpu=neon defines __ARM_NEON__
|
||||||
|
|||||||
@ -401,6 +401,14 @@ void MirrorPlane(const uint8* src_y, int src_stride_y,
|
|||||||
IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
|
IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
|
||||||
MirrorRow = MirrorRow_DSPR2;
|
MirrorRow = MirrorRow_DSPR2;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(HAS_MIRRORROW_MSA)
|
||||||
|
if (TestCpuFlag(kCpuHasMSA)) {
|
||||||
|
MirrorRow = MirrorRow_Any_MSA;
|
||||||
|
if (IS_ALIGNED(width, 64)) {
|
||||||
|
MirrorRow = MirrorRow_MSA;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Mirror plane
|
// Mirror plane
|
||||||
|
|||||||
@ -141,6 +141,14 @@ void RotatePlane180(const uint8* src, int src_stride,
|
|||||||
MirrorRow = MirrorRow_DSPR2;
|
MirrorRow = MirrorRow_DSPR2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_MIRRORROW_MSA)
|
||||||
|
if (TestCpuFlag(kCpuHasMSA)) {
|
||||||
|
MirrorRow = MirrorRow_Any_MSA;
|
||||||
|
if (IS_ALIGNED(width, 64)) {
|
||||||
|
MirrorRow = MirrorRow_MSA;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(HAS_COPYROW_SSE2)
|
#if defined(HAS_COPYROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
||||||
|
|||||||
@ -631,6 +631,9 @@ ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
|
|||||||
#ifdef HAS_MIRRORROW_NEON
|
#ifdef HAS_MIRRORROW_NEON
|
||||||
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
|
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAS_MIRRORROW_MSA
|
||||||
|
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
|
||||||
|
#endif
|
||||||
#ifdef HAS_ARGBMIRRORROW_AVX2
|
#ifdef HAS_ARGBMIRRORROW_AVX2
|
||||||
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
|
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
45
source/row_msa.cc
Normal file
45
source/row_msa.cc
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libyuv/row.h"
|
||||||
|
|
||||||
|
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||||
|
#include "libyuv/macros_msa.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
namespace libyuv {
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||||
|
void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
|
||||||
|
int count;
|
||||||
|
v16u8 src0, src1, src2, src3;
|
||||||
|
v16u8 dst0, dst1, dst2, dst3;
|
||||||
|
v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
|
||||||
|
|
||||||
|
src += width - 64;
|
||||||
|
|
||||||
|
for (count = 0; count < width; count += 64) {
|
||||||
|
LD_UB4(src, 16, src3, src2, src1, src0);
|
||||||
|
VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
|
||||||
|
VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
|
||||||
|
ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
|
||||||
|
dst += 64;
|
||||||
|
src -= 64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
} // namespace libyuv
|
||||||
|
#endif
|
||||||
Loading…
x
Reference in New Issue
Block a user