mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 17:26:49 +08:00
Optimize functions for LASX in row_lasx.cc.
1. Optimize 18 functions in source/row_lasx.cc file. 2. Make small modifications to LSX. 3. Remove some unnecessary content. Bug: libyuv:912 Change-Id: Ifd1d85366efb9cdb3b99491e30fa450ff1848661 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3507640 Reviewed-by: Mirko Bonadei <mbonadei@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
42d76a342f
commit
91bae707e1
@ -78,8 +78,6 @@ LIBYUV_API
|
||||
int ArmCpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int MipsCpuCaps(const char* cpuinfo_name);
|
||||
LIBYUV_API
|
||||
int LoongarchCpuCaps(void);
|
||||
|
||||
// For testing, allow CPU flags to be disabled.
|
||||
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
* Xiwei Gu <guxiwei-hf@loongson.cn>
|
||||
* Lu Wang <wanglu@loongson.cn>
|
||||
*
|
||||
* This file is a header file for loongarch builtin extention.
|
||||
* This file is a header file for loongarch builtin extension.
|
||||
*
|
||||
*/
|
||||
|
||||
@ -27,12 +27,12 @@
|
||||
|
||||
/**
|
||||
* MAJOR version: Macro usage changes.
|
||||
* MINOR version: Add new functions, or bug fix.
|
||||
* MINOR version: Add new functions, or bug fixes.
|
||||
* MICRO version: Comment changes or implementation changes.
|
||||
*/
|
||||
#define LSOM_VERSION_MAJOR 1
|
||||
#define LSOM_VERSION_MINOR 0
|
||||
#define LSOM_VERSION_MICRO 3
|
||||
#define LSOM_VERSION_MINOR 1
|
||||
#define LSOM_VERSION_MICRO 0
|
||||
|
||||
#define DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1) \
|
||||
{ \
|
||||
@ -79,11 +79,11 @@
|
||||
* Description : Dot product & addition of byte vector elements
|
||||
* Arguments : Inputs - in_c, in_h, in_l
|
||||
* Outputs - out
|
||||
* Retrun Type - halfword
|
||||
* Return Type - halfword
|
||||
* Details : Signed byte elements from in_h are multiplied by
|
||||
* signed byte elements from in_l, and then added adjacent to
|
||||
* each other to get results with the twice size of input.
|
||||
* Then the results plus to signed half word elements from in_c.
|
||||
* Then the results plus to signed half-word elements from in_c.
|
||||
* Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l)
|
||||
* in_c : 1,2,3,4, 1,2,3,4
|
||||
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
|
||||
@ -91,8 +91,7 @@
|
||||
* out : 23,40,41,26, 23,40,41,26
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m128i __lsx_vdp2add_h_b(__m128i in_c,
|
||||
__m128i in_h,
|
||||
static inline __m128i __lsx_vdp2add_h_b(__m128i in_c, __m128i in_h,
|
||||
__m128i in_l) {
|
||||
__m128i out;
|
||||
|
||||
@ -106,20 +105,19 @@ static inline __m128i __lsx_vdp2add_h_b(__m128i in_c,
|
||||
* Description : Dot product & addition of byte vector elements
|
||||
* Arguments : Inputs - in_c, in_h, in_l
|
||||
* Outputs - out
|
||||
* Retrun Type - halfword
|
||||
* Return Type - halfword
|
||||
* Details : Unsigned byte elements from in_h are multiplied by
|
||||
* unsigned byte elements from in_l, and then added adjacent to
|
||||
* each other to get results with the twice size of input.
|
||||
* The results plus to signed half word elements from in_c.
|
||||
* Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l)
|
||||
* The results plus to signed half-word elements from in_c.
|
||||
* Example : out = __lsx_vdp2add_h_bu(in_c, in_h, in_l)
|
||||
* in_c : 1,2,3,4, 1,2,3,4
|
||||
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
|
||||
* in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1
|
||||
* out : 23,40,41,26, 23,40,41,26
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m128i __lsx_vdp2add_h_bu(__m128i in_c,
|
||||
__m128i in_h,
|
||||
static inline __m128i __lsx_vdp2add_h_bu(__m128i in_c, __m128i in_h,
|
||||
__m128i in_l) {
|
||||
__m128i out;
|
||||
|
||||
@ -130,12 +128,38 @@ static inline __m128i __lsx_vdp2add_h_bu(__m128i in_c,
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Dot product & addition of half word vector elements
|
||||
* Description : Dot product & addition of byte vector elements
|
||||
* Arguments : Inputs - in_c, in_h, in_l
|
||||
* Outputs - out
|
||||
* Retrun Type - __m128i
|
||||
* Details : Signed half word elements from in_h are multiplied by
|
||||
* signed half word elements from in_l, and then added adjacent to
|
||||
* Return Type - halfword
|
||||
* Details : Unsigned byte elements from in_h are multiplied by
|
||||
* signed byte elements from in_l, and then added adjacent to
|
||||
* each other to get results with the twice size of input.
|
||||
* The results plus to signed half-word elements from in_c.
|
||||
* Example : out = __lsx_vdp2add_h_bu_b(in_c, in_h, in_l)
|
||||
* in_c : 1,1,1,1, 1,1,1,1
|
||||
* in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
|
||||
* in_l : -1,-2,-3,-4, -5,-6,-7,-8, 1,2,3,4, 5,6,7,8
|
||||
* out : -4,-24,-60,-112, 6,26,62,114
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m128i __lsx_vdp2add_h_bu_b(__m128i in_c, __m128i in_h,
|
||||
__m128i in_l) {
|
||||
__m128i out;
|
||||
|
||||
out = __lsx_vmaddwev_h_bu_b(in_c, in_h, in_l);
|
||||
out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l);
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Dot product & addition of half-word vector elements
|
||||
* Arguments : Inputs - in_c, in_h, in_l
|
||||
* Outputs - out
|
||||
* Return Type - __m128i
|
||||
* Details : Signed half-word elements from in_h are multiplied by
|
||||
* signed half-word elements from in_l, and then added adjacent to
|
||||
* each other to get results with the twice size of input.
|
||||
* Then the results plus to signed word elements from in_c.
|
||||
* Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l)
|
||||
@ -145,8 +169,7 @@ static inline __m128i __lsx_vdp2add_h_bu(__m128i in_c,
|
||||
* out : 23,40,41,26
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m128i __lsx_vdp2add_w_h(__m128i in_c,
|
||||
__m128i in_h,
|
||||
static inline __m128i __lsx_vdp2add_w_h(__m128i in_c, __m128i in_h,
|
||||
__m128i in_l) {
|
||||
__m128i out;
|
||||
|
||||
@ -160,7 +183,7 @@ static inline __m128i __lsx_vdp2add_w_h(__m128i in_c,
|
||||
* Description : Dot product of byte vector elements
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Outputs - out
|
||||
* Retrun Type - halfword
|
||||
* Return Type - halfword
|
||||
* Details : Signed byte elements from in_h are multiplied by
|
||||
* signed byte elements from in_l, and then added adjacent to
|
||||
* each other to get results with the twice size of input.
|
||||
@ -183,7 +206,7 @@ static inline __m128i __lsx_vdp2_h_b(__m128i in_h, __m128i in_l) {
|
||||
* Description : Dot product of byte vector elements
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Outputs - out
|
||||
* Retrun Type - halfword
|
||||
* Return Type - halfword
|
||||
* Details : Unsigned byte elements from in_h are multiplied by
|
||||
* unsigned byte elements from in_l, and then added adjacent to
|
||||
* each other to get results with the twice size of input.
|
||||
@ -206,7 +229,7 @@ static inline __m128i __lsx_vdp2_h_bu(__m128i in_h, __m128i in_l) {
|
||||
* Description : Dot product of byte vector elements
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Outputs - out
|
||||
* Retrun Type - halfword
|
||||
* Return Type - halfword
|
||||
* Details : Unsigned byte elements from in_h are multiplied by
|
||||
* signed byte elements from in_l, and then added adjacent to
|
||||
* each other to get results with the twice size of input.
|
||||
@ -229,7 +252,7 @@ static inline __m128i __lsx_vdp2_h_bu_b(__m128i in_h, __m128i in_l) {
|
||||
* Description : Dot product of byte vector elements
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Outputs - out
|
||||
* Retrun Type - halfword
|
||||
* Return Type - halfword
|
||||
* Details : Signed byte elements from in_h are multiplied by
|
||||
* signed byte elements from in_l, and then added adjacent to
|
||||
* each other to get results with the twice size of input.
|
||||
@ -251,7 +274,8 @@ static inline __m128i __lsx_vdp2_w_h(__m128i in_h, __m128i in_l) {
|
||||
* =============================================================================
|
||||
* Description : Clip all halfword elements of input vector between min & max
|
||||
* out = ((_in) < (min)) ? (min) : (((_in) > (max)) ? (max) :
|
||||
* (_in)) Arguments : Inputs - _in (input vector)
|
||||
* (_in))
|
||||
* Arguments : Inputs - _in (input vector)
|
||||
* - min (min threshold)
|
||||
* - max (max threshold)
|
||||
* Outputs - out (output vector with clipped elements)
|
||||
@ -276,7 +300,7 @@ static inline __m128i __lsx_vclip_h(__m128i _in, __m128i min, __m128i max) {
|
||||
* Description : Set each element of vector between 0 and 255
|
||||
* Arguments : Inputs - _in
|
||||
* Outputs - out
|
||||
* Retrun Type - halfword
|
||||
* Return Type - halfword
|
||||
* Details : Signed byte elements from _in are clamped between 0 and 255.
|
||||
* Example : out = __lsx_vclip255_h(_in)
|
||||
* _in : -8,255,280,249, -8,255,280,249
|
||||
@ -296,7 +320,7 @@ static inline __m128i __lsx_vclip255_h(__m128i _in) {
|
||||
* Description : Set each element of vector between 0 and 255
|
||||
* Arguments : Inputs - _in
|
||||
* Outputs - out
|
||||
* Retrun Type - word
|
||||
* Return Type - word
|
||||
* Details : Signed byte elements from _in are clamped between 0 and 255.
|
||||
* Example : out = __lsx_vclip255_w(_in)
|
||||
* _in : -8,255,280,249
|
||||
@ -363,16 +387,18 @@ static inline __m128i __lsx_vclip255_w(__m128i _in) {
|
||||
* Description : Transpose 8x8 block with byte elements in vectors
|
||||
* Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
|
||||
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
|
||||
* _out7 Details : The rows of the matrix become columns, and the columns
|
||||
* become rows. Example : LSX_TRANSPOSE8x8_B _in0 : 00,01,02,03,04,05,06,07,
|
||||
* 00,00,00,00,00,00,00,00 _in1 : 10,11,12,13,14,15,16,17,
|
||||
* 00,00,00,00,00,00,00,00 _in2 : 20,21,22,23,24,25,26,27,
|
||||
* 00,00,00,00,00,00,00,00 _in3 : 30,31,32,33,34,35,36,37,
|
||||
* 00,00,00,00,00,00,00,00 _in4 : 40,41,42,43,44,45,46,47,
|
||||
* 00,00,00,00,00,00,00,00 _in5 : 50,51,52,53,54,55,56,57,
|
||||
* 00,00,00,00,00,00,00,00 _in6 : 60,61,62,63,64,65,66,67,
|
||||
* 00,00,00,00,00,00,00,00 _in7 : 70,71,72,73,74,75,76,77,
|
||||
* 00,00,00,00,00,00,00,00
|
||||
* _out7
|
||||
* Details : The rows of the matrix become columns, and the columns
|
||||
* become rows.
|
||||
* Example : LSX_TRANSPOSE8x8_B
|
||||
* _in0 : 00,01,02,03,04,05,06,07, 00,00,00,00,00,00,00,00
|
||||
* _in1 : 10,11,12,13,14,15,16,17, 00,00,00,00,00,00,00,00
|
||||
* _in2 : 20,21,22,23,24,25,26,27, 00,00,00,00,00,00,00,00
|
||||
* _in3 : 30,31,32,33,34,35,36,37, 00,00,00,00,00,00,00,00
|
||||
* _in4 : 40,41,42,43,44,45,46,47, 00,00,00,00,00,00,00,00
|
||||
* _in5 : 50,51,52,53,54,55,56,57, 00,00,00,00,00,00,00,00
|
||||
* _in6 : 60,61,62,63,64,65,66,67, 00,00,00,00,00,00,00,00
|
||||
* _in7 : 70,71,72,73,74,75,76,77, 00,00,00,00,00,00,00,00
|
||||
*
|
||||
* _ out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00
|
||||
* _ out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00
|
||||
@ -388,8 +414,8 @@ static inline __m128i __lsx_vclip255_w(__m128i _in) {
|
||||
_out0, _out1, _out2, _out3, _out4, _out5, _out6, \
|
||||
_out7) \
|
||||
{ \
|
||||
__m128i zero = {0}; \
|
||||
__m128i shuf8 = {0x0F0E0D0C0B0A0908, 0x1716151413121110}; \
|
||||
__m128i zero = { 0 }; \
|
||||
__m128i shuf8 = { 0x0F0E0D0C0B0A0908, 0x1716151413121110 }; \
|
||||
__m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \
|
||||
\
|
||||
_t0 = __lsx_vilvl_b(_in2, _in0); \
|
||||
@ -412,7 +438,7 @@ static inline __m128i __lsx_vclip255_w(__m128i _in) {
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Transpose 8x8 block with half word elements in vectors
|
||||
* Description : Transpose 8x8 block with half-word elements in vectors
|
||||
* Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
|
||||
* Outputs - out0, out1, out2, out3, out4, out5, out6, out7
|
||||
* Details :
|
||||
@ -467,15 +493,16 @@ static inline __m128i __lsx_vclip255_w(__m128i _in) {
|
||||
* Outputs - _out0, _out1, _out2, _out3 (output 4x8 byte block)
|
||||
* Return Type - as per RTYPE
|
||||
* Details : The rows of the matrix become columns, and the columns become
|
||||
* rows. Example : LSX_TRANSPOSE8x4_B _in0 : 00,01,02,03,00,00,00,00,
|
||||
* 00,00,00,00,00,00,00,00 _in1 : 10,11,12,13,00,00,00,00,
|
||||
* 00,00,00,00,00,00,00,00 _in2 : 20,21,22,23,00,00,00,00,
|
||||
* 00,00,00,00,00,00,00,00 _in3 : 30,31,32,33,00,00,00,00,
|
||||
* 00,00,00,00,00,00,00,00 _in4 : 40,41,42,43,00,00,00,00,
|
||||
* 00,00,00,00,00,00,00,00 _in5 : 50,51,52,53,00,00,00,00,
|
||||
* 00,00,00,00,00,00,00,00 _in6 : 60,61,62,63,00,00,00,00,
|
||||
* 00,00,00,00,00,00,00,00 _in7 : 70,71,72,73,00,00,00,00,
|
||||
* 00,00,00,00,00,00,00,00
|
||||
* rows.
|
||||
* Example : LSX_TRANSPOSE8x4_B
|
||||
* _in0 : 00,01,02,03,00,00,00,00, 00,00,00,00,00,00,00,00
|
||||
* _in1 : 10,11,12,13,00,00,00,00, 00,00,00,00,00,00,00,00
|
||||
* _in2 : 20,21,22,23,00,00,00,00, 00,00,00,00,00,00,00,00
|
||||
* _in3 : 30,31,32,33,00,00,00,00, 00,00,00,00,00,00,00,00
|
||||
* _in4 : 40,41,42,43,00,00,00,00, 00,00,00,00,00,00,00,00
|
||||
* _in5 : 50,51,52,53,00,00,00,00, 00,00,00,00,00,00,00,00
|
||||
* _in6 : 60,61,62,63,00,00,00,00, 00,00,00,00,00,00,00,00
|
||||
* _in7 : 70,71,72,73,00,00,00,00, 00,00,00,00,00,00,00,00
|
||||
*
|
||||
* _out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00
|
||||
* _out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00
|
||||
@ -705,7 +732,7 @@ static inline __m256i __lasx_xvdp2_h_bu(__m256i in_h, __m256i in_l) {
|
||||
* Details : Signed byte elements from in_h are multiplied with
|
||||
* signed byte elements from in_l producing a result
|
||||
* twice the size of input i.e. signed halfword.
|
||||
* Then this iniplication results of adjacent odd-even elements
|
||||
* Then this multiplication results of adjacent odd-even elements
|
||||
* are added to the out vector
|
||||
* Example : See out = __lasx_xvdp2_w_h(in_h, in_l)
|
||||
* =============================================================================
|
||||
@ -748,10 +775,10 @@ static inline __m256i __lasx_xvdp2_w_h(__m256i in_h, __m256i in_l) {
|
||||
* Description : Dot product of word vector elements
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Output - out
|
||||
* Retrun Type - signed double
|
||||
* Return Type - signed double
|
||||
* Details : Signed word elements from in_h are multiplied with
|
||||
* signed word elements from in_l producing a result
|
||||
* twice the size of input i.e. signed double word.
|
||||
* twice the size of input i.e. signed double-word.
|
||||
* Then this multiplied results of adjacent odd-even elements
|
||||
* are added to the out vector.
|
||||
* Example : See out = __lasx_xvdp2_w_h(in_h, in_l)
|
||||
@ -792,7 +819,7 @@ static inline __m256i __lasx_xvdp2_w_hu_h(__m256i in_h, __m256i in_l) {
|
||||
* Description : Dot product & addition of byte vector elements
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Output - out
|
||||
* Retrun Type - halfword
|
||||
* Return Type - halfword
|
||||
* Details : Signed byte elements from in_h are multiplied with
|
||||
* signed byte elements from in_l producing a result
|
||||
* twice the size of input i.e. signed halfword.
|
||||
@ -801,8 +828,7 @@ static inline __m256i __lasx_xvdp2_w_hu_h(__m256i in_h, __m256i in_l) {
|
||||
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvdp2add_h_b(__m256i in_c,
|
||||
__m256i in_h,
|
||||
static inline __m256i __lasx_xvdp2add_h_b(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i out;
|
||||
|
||||
@ -811,6 +837,52 @@ static inline __m256i __lasx_xvdp2add_h_b(__m256i in_c,
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Dot product & addition of byte vector elements
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Output - out
|
||||
* Return Type - halfword
|
||||
* Details : Unsigned byte elements from in_h are multiplied with
|
||||
* unsigned byte elements from in_l producing a result
|
||||
* twice the size of input i.e. signed halfword.
|
||||
* Then this multiplied results of adjacent odd-even elements
|
||||
* are added to the in_c vector.
|
||||
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvdp2add_h_bu(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i out;
|
||||
|
||||
out = __lasx_xvmaddwev_h_bu(in_c, in_h, in_l);
|
||||
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Dot product & addition of byte vector elements
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Output - out
|
||||
* Return Type - halfword
|
||||
* Details : Unsigned byte elements from in_h are multiplied with
|
||||
* signed byte elements from in_l producing a result
|
||||
* twice the size of input i.e. signed halfword.
|
||||
* Then this multiplied results of adjacent odd-even elements
|
||||
* are added to the in_c vector.
|
||||
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvdp2add_h_bu_b(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i out;
|
||||
|
||||
out = __lasx_xvmaddwev_h_bu_b(in_c, in_h, in_l);
|
||||
out = __lasx_xvmaddwod_h_bu_b(out, in_h, in_l);
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Dot product of halfword vector elements
|
||||
@ -829,8 +901,7 @@ static inline __m256i __lasx_xvdp2add_h_b(__m256i in_c,
|
||||
* out : 23,40,41,26, 23,40,41,26
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvdp2add_w_h(__m256i in_c,
|
||||
__m256i in_h,
|
||||
static inline __m256i __lasx_xvdp2add_w_h(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i out;
|
||||
|
||||
@ -853,8 +924,7 @@ static inline __m256i __lasx_xvdp2add_w_h(__m256i in_c,
|
||||
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvdp2add_w_hu(__m256i in_c,
|
||||
__m256i in_h,
|
||||
static inline __m256i __lasx_xvdp2add_w_hu(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i out;
|
||||
|
||||
@ -877,8 +947,7 @@ static inline __m256i __lasx_xvdp2add_w_hu(__m256i in_c,
|
||||
* Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l)
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c,
|
||||
__m256i in_h,
|
||||
static inline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i out;
|
||||
|
||||
@ -902,8 +971,7 @@ static inline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c,
|
||||
* Example : See out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l)
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c,
|
||||
__m256i in_h,
|
||||
static inline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i out;
|
||||
|
||||
@ -932,8 +1000,7 @@ static inline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c,
|
||||
* out : -7,-3,0,0, 0,-1,0,-1
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvdp2sub_w_h(__m256i in_c,
|
||||
__m256i in_h,
|
||||
static inline __m256i __lasx_xvdp2sub_w_h(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i out;
|
||||
|
||||
@ -949,10 +1016,10 @@ static inline __m256i __lasx_xvdp2sub_w_h(__m256i in_c,
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Output - out
|
||||
* Return Type - signed word
|
||||
* Details : Signed halfword elements from in_h are iniplied with
|
||||
* Details : Signed halfword elements from in_h are multiplied with
|
||||
* signed halfword elements from in_l producing a result
|
||||
* four times the size of input i.e. signed doubleword.
|
||||
* Then this iniplication results of four adjacent elements
|
||||
* Then this multiplication results of four adjacent elements
|
||||
* are added together and stored to the out vector.
|
||||
* Example : out = __lasx_xvdp4_d_h(in_h, in_l)
|
||||
* in_h : 3,1,3,0, 0,0,0,1, 0,0,1,-1, 0,0,0,1
|
||||
@ -1134,8 +1201,7 @@ static inline __m256i __lasx_xvaddw_w_w_h(__m256i in_h, __m256i in_l) {
|
||||
* out : 201, 602,1203,2004, -995, -1794,-2793,-3992
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvmaddwl_w_h(__m256i in_c,
|
||||
__m256i in_h,
|
||||
static inline __m256i __lasx_xvmaddwl_w_h(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i tmp0, tmp1, out;
|
||||
|
||||
@ -1159,8 +1225,7 @@ static inline __m256i __lasx_xvmaddwl_w_h(__m256i in_c,
|
||||
* Example : See out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l)
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvmaddwh_w_h(__m256i in_c,
|
||||
__m256i in_h,
|
||||
static inline __m256i __lasx_xvmaddwh_w_h(__m256i in_c, __m256i in_h,
|
||||
__m256i in_l) {
|
||||
__m256i tmp0, tmp1, out;
|
||||
|
||||
@ -1221,22 +1286,24 @@ static inline __m256i __lasx_xvmulwh_w_h(__m256i in_h, __m256i in_l) {
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : The low half of the vector elements are expanded and
|
||||
* added saturately after being doubled.
|
||||
* Description : The low half of the vector elements are added to the high half
|
||||
* after being doubled, then saturated.
|
||||
* Arguments : Inputs - in_h, in_l
|
||||
* Output - out
|
||||
* Details : The in_h vector adds the in_l vector saturately after the lower
|
||||
* half of the two-fold zero extension (unsigned byte to unsigned
|
||||
* halfword) and the results are stored to the out vector.
|
||||
* Details : The in_h vector adds the in_l vector after the lower half of
|
||||
* the two-fold zero extension (unsigned byte to unsigned
|
||||
* halfword) and then saturated. The results are stored to the out
|
||||
* vector.
|
||||
* Example : out = __lasx_xvsaddw_hu_hu_bu(in_h, in_l)
|
||||
* in_h : 2,65532,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1
|
||||
* in_l : 3,6,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1, 3,18,3,0, 0,0,0,1, 0,0,1,1,
|
||||
* 0,0,0,1 out : 5,65535,4,2, 1,0,0,1, 3,18,4,0, 1,0,0,2,
|
||||
* 0,0,0,1
|
||||
* out : 5,65535,4,2, 1,0,0,1, 3,18,4,0, 1,0,0,2,
|
||||
* =============================================================================
|
||||
*/
|
||||
static inline __m256i __lasx_xvsaddw_hu_hu_bu(__m256i in_h, __m256i in_l) {
|
||||
__m256i tmp1, out;
|
||||
__m256i zero = {0};
|
||||
__m256i zero = { 0 };
|
||||
|
||||
tmp1 = __lasx_xvilvl_b(zero, in_l);
|
||||
out = __lasx_xvsadd_hu(in_h, tmp1);
|
||||
@ -1308,8 +1375,8 @@ static inline __m256i __lasx_xvclip255_w(__m256i in) {
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Indexed halfword element values are replicated to all
|
||||
* elements in output vector. If 'indx < 8' use xvsplati_l_*,
|
||||
* if 'indx >= 8' use xvsplati_h_*.
|
||||
* elements in output vector. If 'idx < 8' use xvsplati_l_*,
|
||||
* if 'idx >= 8' use xvsplati_h_*.
|
||||
* Arguments : Inputs - in, idx
|
||||
* Output - out
|
||||
* Details : Idx element value from in vector is replicated to all
|
||||
@ -1332,8 +1399,8 @@ static inline __m256i __lasx_xvsplati_l_h(__m256i in, int idx) {
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Indexed halfword element values are replicated to all
|
||||
* elements in output vector. If 'indx < 8' use xvsplati_l_*,
|
||||
* if 'indx >= 8' use xvsplati_h_*.
|
||||
* elements in output vector. If 'idx < 8' use xvsplati_l_*,
|
||||
* if 'idx >= 8' use xvsplati_h_*.
|
||||
* Arguments : Inputs - in, idx
|
||||
* Output - out
|
||||
* Details : Idx element value from in vector is replicated to all
|
||||
@ -1355,7 +1422,7 @@ static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Description : Transpose 4x4 block with double word elements in vectors
|
||||
* Description : Transpose 4x4 block with double-word elements in vectors
|
||||
* Arguments : Inputs - _in0, _in1, _in2, _in3
|
||||
* Outputs - _out0, _out1, _out2, _out3
|
||||
* Example : LASX_TRANSPOSE4x4_D
|
||||
@ -1389,10 +1456,16 @@ static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
|
||||
* Description : Transpose 8x8 block with word elements in vectors
|
||||
* Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
|
||||
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
|
||||
* _out7 Example : LASX_TRANSPOSE8x8_W _in0 : 1,2,3,4,5,6,7,8 _in1 :
|
||||
* 2,2,3,4,5,6,7,8 _in2 : 3,2,3,4,5,6,7,8 _in3 : 4,2,3,4,5,6,7,8 _in4 :
|
||||
* 5,2,3,4,5,6,7,8 _in5 : 6,2,3,4,5,6,7,8 _in6 : 7,2,3,4,5,6,7,8 _in7 :
|
||||
* 8,2,3,4,5,6,7,8
|
||||
* _out7
|
||||
* Example : LASX_TRANSPOSE8x8_W
|
||||
* _in0 : 1,2,3,4,5,6,7,8
|
||||
* _in1 : 2,2,3,4,5,6,7,8
|
||||
* _in2 : 3,2,3,4,5,6,7,8
|
||||
* _in3 : 4,2,3,4,5,6,7,8
|
||||
* _in4 : 5,2,3,4,5,6,7,8
|
||||
* _in5 : 6,2,3,4,5,6,7,8
|
||||
* _in6 : 7,2,3,4,5,6,7,8
|
||||
* _in7 : 8,2,3,4,5,6,7,8
|
||||
*
|
||||
* _out0 : 1,2,3,4,5,6,7,8
|
||||
* _out1 : 2,2,2,2,2,2,2,2
|
||||
@ -1445,8 +1518,10 @@ static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
|
||||
* _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15
|
||||
* (input 16x8 byte block)
|
||||
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
|
||||
* _out7 (output 8x16 byte block) Details : The rows of the matrix become
|
||||
* columns, and the columns become rows. Example : See LASX_TRANSPOSE16x8_H
|
||||
* _out7 (output 8x16 byte block)
|
||||
* Details : The rows of the matrix become columns, and the columns become
|
||||
* rows.
|
||||
* Example : See LASX_TRANSPOSE16x8_H
|
||||
* =============================================================================
|
||||
*/
|
||||
#define LASX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
|
||||
@ -1498,13 +1573,20 @@ static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
|
||||
* _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15
|
||||
* (input 16x8 byte block)
|
||||
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
|
||||
* _out7 (output 8x16 byte block) Details : The rows of the matrix become
|
||||
* columns, and the columns become rows. Example : LASX_TRANSPOSE16x8_H _in0
|
||||
* : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in1 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in2
|
||||
* : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in3 : 4,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in4
|
||||
* : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in5 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in6
|
||||
* : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in7 : 8,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in8
|
||||
* : 9,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 _in9 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _out7 (output 8x16 byte block)
|
||||
* Details : The rows of the matrix become columns, and the columns become
|
||||
* rows.
|
||||
* Example : LASX_TRANSPOSE16x8_H
|
||||
* _in0 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in1 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in2 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in3 : 4,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in4 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in5 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in6 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in7 : 8,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in8 : 9,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in9 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in10 : 0,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in11 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
* _in12 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0
|
||||
@ -1597,7 +1679,8 @@ static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
|
||||
* Outputs - _out0, _out1, _out2, _out3
|
||||
* Return Type - signed halfword
|
||||
* Details : The rows of the matrix become columns, and the columns become
|
||||
* rows. Example : See LASX_TRANSPOSE8x8_H
|
||||
* rows.
|
||||
* Example : See LASX_TRANSPOSE8x8_H
|
||||
* =============================================================================
|
||||
*/
|
||||
#define LASX_TRANSPOSE4x4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \
|
||||
@ -1619,7 +1702,8 @@ static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
|
||||
* Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7
|
||||
* (input 8x8 byte block)
|
||||
* Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6,
|
||||
* _out7 (output 8x8 byte block) Example : See LASX_TRANSPOSE8x8_H
|
||||
* _out7 (output 8x8 byte block)
|
||||
* Example : See LASX_TRANSPOSE8x8_H
|
||||
* =============================================================================
|
||||
*/
|
||||
#define LASX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \
|
||||
@ -1652,11 +1736,16 @@ static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
|
||||
* Arguments : Inputs - _in0, _in1, ~
|
||||
* Outputs - _out0, _out1, ~
|
||||
* Details : The rows of the matrix become columns, and the columns become
|
||||
* rows. Example : LASX_TRANSPOSE8x8_H _in0 : 1,2,3,4, 5,6,7,8, 1,2,3,4,
|
||||
* 5,6,7,8 _in1 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8 _in2 : 8,2,3,4, 5,6,7,8,
|
||||
* 8,2,3,4, 5,6,7,8 _in3 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 _in4 : 9,2,3,4,
|
||||
* 5,6,7,8, 9,2,3,4, 5,6,7,8 _in5 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 _in6 :
|
||||
* 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 _in7 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8
|
||||
* rows.
|
||||
* Example : LASX_TRANSPOSE8x8_H
|
||||
* _in0 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
|
||||
* _in1 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8
|
||||
* _in2 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8
|
||||
* _in3 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
|
||||
* _in4 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8
|
||||
* _in5 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
|
||||
* _in6 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8
|
||||
* _in7 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8
|
||||
*
|
||||
* _out0 : 1,8,8,1, 9,1,1,9, 1,8,8,1, 9,1,1,9
|
||||
* _out1 : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2
|
||||
@ -1832,14 +1921,12 @@ static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
|
||||
* VP:1,2,3,4,
|
||||
* =============================================================================
|
||||
*/
|
||||
#define VECT_PRINT(RTYPE, element_num, in0, enter) \
|
||||
{ \
|
||||
RTYPE _tmp0 = (RTYPE)in0; \
|
||||
int _i = 0; \
|
||||
if (enter) \
|
||||
printf("\nVP:"); \
|
||||
for (_i = 0; _i < element_num; _i++) \
|
||||
printf("%d,", _tmp0[_i]); \
|
||||
#define VECT_PRINT(RTYPE, element_num, in0, enter) \
|
||||
{ \
|
||||
RTYPE _tmp0 = (RTYPE)in0; \
|
||||
int _i = 0; \
|
||||
if (enter) printf("\nVP:"); \
|
||||
for (_i = 0; _i < element_num; _i++) printf("%d,", _tmp0[_i]); \
|
||||
}
|
||||
|
||||
#endif /* LOONGSON_INTRINSICS_H */
|
||||
|
||||
@ -668,9 +668,6 @@ extern "C" {
|
||||
#define HAS_SPLITUVROW_LSX
|
||||
#define HAS_SETROW_LSX
|
||||
#define HAS_MIRRORSPLITUVROW_LSX
|
||||
#define HAS_SOBELXROW_LSX
|
||||
#define HAS_SOBELYROW_LSX
|
||||
#define HAS_HALFFLOATROW_LSX
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
|
||||
@ -709,6 +706,24 @@ extern "C" {
|
||||
#define HAS_ARGBSHADEROW_LASX
|
||||
#define HAS_ARGBGRAYROW_LASX
|
||||
#define HAS_ARGBSEPIAROW_LASX
|
||||
#define HAS_ARGB4444TOARGBROW_LASX
|
||||
#define HAS_ARGB1555TOARGBROW_LASX
|
||||
#define HAS_RGB565TOARGBROW_LASX
|
||||
#define HAS_RGB24TOARGBROW_LASX
|
||||
#define HAS_RAWTOARGBROW_LASX
|
||||
#define HAS_ARGB1555TOYROW_LASX
|
||||
#define HAS_ARGB1555TOUVROW_LASX
|
||||
#define HAS_RGB565TOYROW_LASX
|
||||
#define HAS_RGB565TOUVROW_LASX
|
||||
#define HAS_RGB24TOYROW_LASX
|
||||
#define HAS_RGB24TOUVROW_LASX
|
||||
#define HAS_RAWTOYROW_LASX
|
||||
#define HAS_RAWTOUVROW_LASX
|
||||
#define HAS_NV12TOARGBROW_LASX
|
||||
#define HAS_NV12TORGB565ROW_LASX
|
||||
#define HAS_NV21TOARGBROW_LASX
|
||||
#define HAS_ARGBTOYJROW_LASX
|
||||
#define HAS_ARGBTOUVJROW_LASX
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
|
||||
@ -1090,16 +1105,31 @@ void NV12ToARGBRow_LSX(const uint8_t* src_y,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToARGBRow_LASX(const uint8_t* src_y,
|
||||
const uint8_t* src_uv,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToRGB565Row_LSX(const uint8_t* src_y,
|
||||
const uint8_t* src_uv,
|
||||
uint8_t* dst_rgb565,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToRGB565Row_LASX(const uint8_t* src_y,
|
||||
const uint8_t* src_uv,
|
||||
uint8_t* dst_rgb565,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_LSX(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_LASX(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void YUY2ToARGBRow_LSX(const uint8_t* src_yuy2,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
@ -1136,6 +1166,7 @@ void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
|
||||
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
@ -1278,26 +1309,51 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_LASX(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555,
|
||||
int src_stride_argb1555,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555,
|
||||
int src_stride_argb1555,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB565ToUVRow_LSX(const uint8_t* src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB565ToUVRow_LASX(const uint8_t* src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB24ToUVRow_LSX(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB24ToUVRow_LASX(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RAWToUVRow_LSX(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RAWToUVRow_LASX(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||
@ -1324,9 +1380,13 @@ void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
||||
void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||
void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||
void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
||||
void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
||||
void RGB565ToYRow_LASX(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||
|
||||
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
|
||||
@ -1390,11 +1450,19 @@ void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
|
||||
void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
|
||||
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
@ -1618,26 +1686,51 @@ void ARGBToUVJRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVJRow_Any_LASX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGB1555ToUVRow_Any_LSX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGB1555ToUVRow_Any_LASX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB565ToUVRow_Any_LSX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB565ToUVRow_Any_LASX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB24ToUVRow_Any_LSX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RGB24ToUVRow_Any_LASX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RAWToUVRow_Any_LSX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void RAWToUVRow_Any_LASX(const uint8_t* src_ptr,
|
||||
int src_stride_ptr,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int width);
|
||||
void ARGBToUVRow_C(const uint8_t* src_rgb,
|
||||
int src_stride_rgb,
|
||||
uint8_t* dst_u,
|
||||
@ -2655,10 +2748,12 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
|
||||
int width);
|
||||
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
||||
void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
||||
void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
||||
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||
void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
|
||||
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||
void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||
void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
||||
void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
||||
void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
||||
@ -2671,6 +2766,9 @@ void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
|
||||
void RGB565ToARGBRow_LSX(const uint8_t* src_rgb565,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void RGB565ToARGBRow_LASX(const uint8_t* src_rgb565,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
@ -2680,6 +2778,9 @@ void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
|
||||
void ARGB1555ToARGBRow_LSX(const uint8_t* src_argb1555,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void ARGB1555ToARGBRow_LASX(const uint8_t* src_argb1555,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
@ -2689,6 +2790,9 @@ void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444,
|
||||
void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void ARGB4444ToARGBRow_LASX(const uint8_t* src_argb4444,
|
||||
uint8_t* dst_argb,
|
||||
int width);
|
||||
void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
||||
void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||
void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width);
|
||||
@ -2746,10 +2850,14 @@ void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr,
|
||||
void RGB24ToARGBRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void RGB24ToARGBRow_Any_LASX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToRGBARow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToARGBRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
|
||||
void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
@ -2764,6 +2872,9 @@ void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr,
|
||||
void RGB565ToARGBRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void RGB565ToARGBRow_Any_LASX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
@ -2776,6 +2887,9 @@ void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr,
|
||||
void ARGB1555ToARGBRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void ARGB1555ToARGBRow_Any_LASX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
|
||||
void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
@ -2783,6 +2897,9 @@ void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr,
|
||||
void ARGB4444ToARGBRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
void ARGB4444ToARGBRow_Any_LASX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
int width);
|
||||
|
||||
void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
|
||||
void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
|
||||
@ -4419,16 +4536,31 @@ void NV12ToARGBRow_Any_LSX(const uint8_t* y_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToARGBRow_Any_LASX(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToRGB565Row_Any_LSX(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV12ToRGB565Row_Any_LASX(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_Any_LSX(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void NV21ToARGBRow_Any_LASX(const uint8_t* y_buf,
|
||||
const uint8_t* uv_buf,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width);
|
||||
void YUY2ToARGBRow_Any_LSX(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
|
||||
@ -1883,6 +1883,16 @@ int RGB24ToI420(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOYROW_LASX) && defined(HAS_RGB24TOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGB24ToUVRow = RGB24ToUVRow_Any_LASX;
|
||||
RGB24ToYRow = RGB24ToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB24ToYRow = RGB24ToYRow_LASX;
|
||||
RGB24ToUVRow = RGB24ToUVRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Other platforms do intermediate conversion from RGB24 to ARGB.
|
||||
#else // HAS_RGB24TOYROW
|
||||
@ -2205,6 +2215,16 @@ int RAWToI420(const uint8_t* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOYROW_LASX) && defined(HAS_RAWTOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RAWToUVRow = RAWToUVRow_Any_LASX;
|
||||
RAWToYRow = RAWToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToYRow = RAWToYRow_LASX;
|
||||
RAWToUVRow = RAWToUVRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Other platforms do intermediate conversion from RAW to ARGB.
|
||||
#else // HAS_RAWTOYROW
|
||||
@ -2463,7 +2483,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
||||
int height) {
|
||||
int y;
|
||||
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
|
||||
defined(HAS_RGB565TOYROW_LSX))
|
||||
defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
|
||||
void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||
RGB565ToUVRow_C;
|
||||
@ -2501,7 +2521,8 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
||||
}
|
||||
}
|
||||
// MSA version does direct RGB565 to YUV.
|
||||
#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX))
|
||||
#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX) \
|
||||
|| defined(HAS_RGB565TOYROW_LASX))
|
||||
#if defined(HAS_RGB565TOYROW_MSA) && defined(HAS_RGB565TOUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
RGB565ToUVRow = RGB565ToUVRow_Any_MSA;
|
||||
@ -2522,6 +2543,16 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB565TOYROW_LASX) && defined(HAS_RGB565TOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGB565ToUVRow = RGB565ToUVRow_Any_LASX;
|
||||
RGB565ToYRow = RGB565ToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB565ToYRow = RGB565ToYRow_LASX;
|
||||
RGB565ToUVRow = RGB565ToUVRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Other platforms do intermediate conversion from RGB565 to ARGB.
|
||||
#else
|
||||
#if defined(HAS_RGB565TOARGBROW_SSE2)
|
||||
@ -2575,14 +2606,14 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
||||
#endif
|
||||
{
|
||||
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
|
||||
defined(HAS_RGB565TOYROW_LSX))
|
||||
defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
#endif
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
|
||||
defined(HAS_RGB565TOYROW_LSX))
|
||||
defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
|
||||
RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width);
|
||||
RGB565ToYRow(src_rgb565, dst_y, width);
|
||||
RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width);
|
||||
@ -2600,7 +2631,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
||||
}
|
||||
if (height & 1) {
|
||||
#if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
|
||||
defined(HAS_RGB565TOYROW_LSX))
|
||||
defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
|
||||
RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width);
|
||||
RGB565ToYRow(src_rgb565, dst_y, width);
|
||||
#else
|
||||
@ -2610,7 +2641,7 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
||||
#endif
|
||||
}
|
||||
#if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \
|
||||
defined(HAS_RGB565TOYROW_LSX))
|
||||
defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX))
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
@ -2631,7 +2662,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
int height) {
|
||||
int y;
|
||||
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
|
||||
defined(HAS_ARGB1555TOYROW_LSX))
|
||||
defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
|
||||
void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555,
|
||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||
ARGB1555ToUVRow_C;
|
||||
@ -2670,7 +2701,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
}
|
||||
}
|
||||
// MSA version does direct ARGB1555 to YUV.
|
||||
#elif (defined(HAS_ARGB1555TOYROW_MSA))
|
||||
#elif (defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_LSX) \
|
||||
|| defined(HAS_ARGB1555TOYROW_LASX))
|
||||
#if defined(HAS_ARGB1555TOYROW_MSA) && defined(HAS_ARGB1555TOUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA;
|
||||
@ -2681,7 +2713,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#elif (defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX))
|
||||
#if defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LSX;
|
||||
ARGB1555ToYRow = ARGB1555ToYRow_Any_LSX;
|
||||
@ -2690,6 +2722,17 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
ARGB1555ToUVRow = ARGB1555ToUVRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGB1555TOYROW_LASX) && defined(HAS_ARGB1555TOUVROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LASX;
|
||||
ARGB1555ToYRow = ARGB1555ToYRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGB1555ToYRow = ARGB1555ToYRow_LASX;
|
||||
ARGB1555ToUVRow = ARGB1555ToUVRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Other platforms do intermediate conversion from ARGB1555 to ARGB.
|
||||
#else
|
||||
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
|
||||
@ -2743,7 +2786,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
#endif
|
||||
{
|
||||
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
|
||||
defined(HAS_ARGB1555TOYROW_LSX))
|
||||
defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
@ -2751,7 +2794,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
|
||||
defined(HAS_ARGB1555TOYROW_LSX))
|
||||
defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
|
||||
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y,
|
||||
@ -2771,7 +2814,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
}
|
||||
if (height & 1) {
|
||||
#if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
|
||||
defined(HAS_ARGB1555TOYROW_LSX))
|
||||
defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
|
||||
ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
|
||||
ARGB1555ToYRow(src_argb1555, dst_y, width);
|
||||
#else
|
||||
@ -2781,7 +2824,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
#endif
|
||||
}
|
||||
#if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \
|
||||
defined(HAS_ARGB1555TOYROW_LSX))
|
||||
defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX))
|
||||
free_aligned_buffer_64(row);
|
||||
#endif
|
||||
}
|
||||
@ -2873,6 +2916,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGB4444TOARGBROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
|
||||
@ -2886,6 +2886,14 @@ int RGB24ToARGB(const uint8_t* src_rgb24,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB24TOARGBROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB24ToARGBRow(src_rgb24, dst_argb, width);
|
||||
@ -2953,6 +2961,14 @@ int RAWToARGB(const uint8_t* src_raw,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RAWTOARGBROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RAWToARGBRow = RAWToARGBRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RAWToARGBRow = RAWToARGBRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RAWToARGBRow(src_raw, dst_argb, width);
|
||||
@ -3079,6 +3095,14 @@ int RGB565ToARGB(const uint8_t* src_rgb565,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_RGB565TOARGBROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB565ToARGBRow(src_rgb565, dst_argb, width);
|
||||
@ -3154,6 +3178,14 @@ int ARGB1555ToARGB(const uint8_t* src_argb1555,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGB1555TOARGBROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
|
||||
@ -3229,6 +3261,14 @@ int ARGB4444ToARGB(const uint8_t* src_argb4444,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGB4444TOARGBROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
|
||||
@ -3516,6 +3556,14 @@ int NV12ToARGBMatrix(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV12TOARGBROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
|
||||
@ -3592,6 +3640,14 @@ int NV21ToARGBMatrix(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV21TOARGBROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width);
|
||||
@ -4340,6 +4396,14 @@ int NV12ToRGB565Matrix(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV12TORGB565ROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_Any_LASX;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
NV12ToRGB565Row = NV12ToRGB565Row_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToRGB565Row(src_y, src_uv, dst_rgb565, yuvconstants, width);
|
||||
|
||||
@ -1941,6 +1941,16 @@ int ARGBToJ420(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_LASX) && defined(HAS_ARGBTOUVJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_LASX;
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYJRow = ARGBToYJRow_LASX;
|
||||
ARGBToUVJRow = ARGBToUVJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
|
||||
@ -2054,6 +2064,16 @@ int ARGBToJ422(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_LASX) && defined(HAS_ARGBTOUVJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_LASX;
|
||||
ARGBToUVJRow = ARGBToUVJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYJRow = ARGBToYJRow_LASX;
|
||||
ARGBToUVJRow = ARGBToUVJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
|
||||
|
||||
@ -196,7 +196,7 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name) {
|
||||
#define LOONGARCH_CFG2_LSX (1 << 6)
|
||||
#define LOONGARCH_CFG2_LASX (1 << 7)
|
||||
|
||||
#if defined(__loongarch__) && defined(__linux__)
|
||||
#if defined(__loongarch__)
|
||||
LIBYUV_API SAFEBUFFERS int LoongarchCpuCaps(void) {
|
||||
int flag = 0x0;
|
||||
uint32_t cfg2 = 0;
|
||||
|
||||
@ -4207,6 +4207,14 @@ static int ARGBSobelize(const uint8_t* src_argb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBTOYJROW_LASX)
|
||||
if (TestCpuFlag(kCpuHasLASX)) {
|
||||
ARGBToYJRow = ARGBToYJRow_Any_LASX;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
ARGBToYJRow = ARGBToYJRow_LASX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_SOBELYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
|
||||
@ -698,6 +698,9 @@ ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
|
||||
#ifdef HAS_NV12TOARGBROW_LSX
|
||||
ANY21C(NV12ToARGBRow_Any_LSX, NV12ToARGBRow_LSX, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TOARGBROW_LASX
|
||||
ANY21C(NV12ToARGBRow_Any_LASX, NV12ToARGBRow_LASX, 1, 1, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_SSSE3
|
||||
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
@ -713,6 +716,9 @@ ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
|
||||
#ifdef HAS_NV21TOARGBROW_LSX
|
||||
ANY21C(NV21ToARGBRow_Any_LSX, NV21ToARGBRow_LSX, 1, 1, 2, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV21TOARGBROW_LASX
|
||||
ANY21C(NV21ToARGBRow_Any_LASX, NV21ToARGBRow_LASX, 1, 1, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB24ROW_NEON
|
||||
ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7)
|
||||
#endif
|
||||
@ -746,6 +752,9 @@ ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
|
||||
#ifdef HAS_NV12TORGB565ROW_LSX
|
||||
ANY21C(NV12ToRGB565Row_Any_LSX, NV12ToRGB565Row_LSX, 1, 1, 2, 2, 7)
|
||||
#endif
|
||||
#ifdef HAS_NV12TORGB565ROW_LASX
|
||||
ANY21C(NV12ToRGB565Row_Any_LASX, NV12ToRGB565Row_LASX, 1, 1, 2, 2, 15)
|
||||
#endif
|
||||
#undef ANY21C
|
||||
|
||||
// Any 2 planes of 16 bit to 1 with yuvconstants
|
||||
@ -998,6 +1007,9 @@ ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15)
|
||||
#ifdef HAS_ARGBTOYJROW_LSX
|
||||
ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOYJROW_LASX
|
||||
ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOYROW_NEON
|
||||
ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7)
|
||||
#endif
|
||||
@ -1043,6 +1055,9 @@ ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
|
||||
#ifdef HAS_RGB24TOYROW_LSX
|
||||
ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOYROW_LASX
|
||||
ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYROW_NEON
|
||||
ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
|
||||
#endif
|
||||
@ -1061,6 +1076,9 @@ ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
|
||||
#ifdef HAS_RAWTOYROW_LSX
|
||||
ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOYROW_LASX
|
||||
ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOYROW_NEON
|
||||
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
|
||||
#endif
|
||||
@ -1070,6 +1088,9 @@ ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
|
||||
#ifdef HAS_RGB565TOYROW_LSX
|
||||
ANY11(RGB565ToYRow_Any_LSX, RGB565ToYRow_LSX, 0, 2, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOYROW_LASX
|
||||
ANY11(RGB565ToYRow_Any_LASX, RGB565ToYRow_LASX, 0, 2, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGB1555TOYROW_NEON
|
||||
ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
|
||||
#endif
|
||||
@ -1079,6 +1100,9 @@ ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
|
||||
#ifdef HAS_ARGB1555TOYROW_LSX
|
||||
ANY11(ARGB1555ToYRow_Any_LSX, ARGB1555ToYRow_LSX, 0, 2, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGB1555TOYROW_LASX
|
||||
ANY11(ARGB1555ToYRow_Any_LASX, ARGB1555ToYRow_LASX, 0, 2, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGB4444TOYROW_NEON
|
||||
ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
|
||||
#endif
|
||||
@ -1121,6 +1145,9 @@ ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
|
||||
#ifdef HAS_RGB24TOARGBROW_LSX
|
||||
ANY11(RGB24ToARGBRow_Any_LSX, RGB24ToARGBRow_LSX, 0, 3, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOARGBROW_LASX
|
||||
ANY11(RGB24ToARGBRow_Any_LASX, RGB24ToARGBRow_LASX, 0, 3, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOARGBROW_NEON
|
||||
ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
|
||||
#endif
|
||||
@ -1133,6 +1160,9 @@ ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
|
||||
#ifdef HAS_RAWTOARGBROW_LSX
|
||||
ANY11(RAWToARGBRow_Any_LSX, RAWToARGBRow_LSX, 0, 3, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOARGBROW_LASX
|
||||
ANY11(RAWToARGBRow_Any_LASX, RAWToARGBRow_LASX, 0, 3, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOARGBROW_NEON
|
||||
ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
|
||||
#endif
|
||||
@ -1142,6 +1172,9 @@ ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
|
||||
#ifdef HAS_RGB565TOARGBROW_LSX
|
||||
ANY11(RGB565ToARGBRow_Any_LSX, RGB565ToARGBRow_LSX, 0, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOARGBROW_LASX
|
||||
ANY11(RGB565ToARGBRow_Any_LASX, RGB565ToARGBRow_LASX, 0, 2, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGB1555TOARGBROW_NEON
|
||||
ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
|
||||
#endif
|
||||
@ -1151,6 +1184,9 @@ ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
|
||||
#ifdef HAS_ARGB1555TOARGBROW_LSX
|
||||
ANY11(ARGB1555ToARGBRow_Any_LSX, ARGB1555ToARGBRow_LSX, 0, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGB1555TOARGBROW_LASX
|
||||
ANY11(ARGB1555ToARGBRow_Any_LASX, ARGB1555ToARGBRow_LASX, 0, 2, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGB4444TOARGBROW_NEON
|
||||
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
|
||||
#endif
|
||||
@ -1160,6 +1196,9 @@ ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
|
||||
#ifdef HAS_ARGB4444TOARGBROW_LSX
|
||||
ANY11(ARGB4444ToARGBRow_Any_LSX, ARGB4444ToARGBRow_LSX, 0, 2, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGB4444TOARGBROW_LASX
|
||||
ANY11(ARGB4444ToARGBRow_Any_LASX, ARGB4444ToARGBRow_LASX, 0, 2, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGBATTENUATEROW_SSSE3
|
||||
ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3)
|
||||
#endif
|
||||
@ -1936,6 +1975,9 @@ ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
|
||||
#ifdef HAS_ARGBTOUVJROW_LSX
|
||||
ANY12S(ARGBToUVJRow_Any_LSX, ARGBToUVJRow_LSX, 0, 4, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGBTOUVJROW_LASX
|
||||
ANY12S(ARGBToUVJRow_Any_LASX, ARGBToUVJRow_LASX, 0, 4, 31)
|
||||
#endif
|
||||
#ifdef HAS_BGRATOUVROW_NEON
|
||||
ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15)
|
||||
#endif
|
||||
@ -1975,6 +2017,9 @@ ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
|
||||
#ifdef HAS_RGB24TOUVROW_LSX
|
||||
ANY12S(RGB24ToUVRow_Any_LSX, RGB24ToUVRow_LSX, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB24TOUVROW_LASX
|
||||
ANY12S(RGB24ToUVRow_Any_LASX, RGB24ToUVRow_LASX, 0, 3, 31)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOUVROW_NEON
|
||||
ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
|
||||
#endif
|
||||
@ -1987,6 +2032,9 @@ ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
|
||||
#ifdef HAS_RAWTOUVROW_LSX
|
||||
ANY12S(RAWToUVRow_Any_LSX, RAWToUVRow_LSX, 0, 3, 15)
|
||||
#endif
|
||||
#ifdef HAS_RAWTOUVROW_LASX
|
||||
ANY12S(RAWToUVRow_Any_LASX, RAWToUVRow_LASX, 0, 3, 31)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOUVROW_NEON
|
||||
ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
|
||||
#endif
|
||||
@ -1996,6 +2044,9 @@ ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
|
||||
#ifdef HAS_RGB565TOUVROW_LSX
|
||||
ANY12S(RGB565ToUVRow_Any_LSX, RGB565ToUVRow_LSX, 0, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_RGB565TOUVROW_LASX
|
||||
ANY12S(RGB565ToUVRow_Any_LASX, RGB565ToUVRow_LASX, 0, 2, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGB1555TOUVROW_NEON
|
||||
ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
|
||||
#endif
|
||||
@ -2005,6 +2056,9 @@ ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
|
||||
#ifdef HAS_ARGB1555TOUVROW_LSX
|
||||
ANY12S(ARGB1555ToUVRow_Any_LSX, ARGB1555ToUVRow_LSX, 0, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_ARGB1555TOUVROW_LASX
|
||||
ANY12S(ARGB1555ToUVRow_Any_LASX, ARGB1555ToUVRow_LASX, 0, 2, 31)
|
||||
#endif
|
||||
#ifdef HAS_ARGB4444TOUVROW_NEON
|
||||
ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -152,7 +152,7 @@ extern "C" {
|
||||
_reg1 = __lsx_vmsub_h(_reg1, const_94, _tmpg); \
|
||||
_reg0 = __lsx_vmsub_h(_reg0, const_38, _tmpr); \
|
||||
_reg1 = __lsx_vmsub_h(_reg1, const_18, _tmpb); \
|
||||
_dst0 = __lsx_vsrlni_b_h(_reg1, _reg0, 8); \
|
||||
_dst0 = __lsx_vpickod_b(_reg1, _reg0); \
|
||||
}
|
||||
|
||||
void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444,
|
||||
@ -355,7 +355,6 @@ void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555,
|
||||
__m128i const_129 = __lsx_vldi(129);
|
||||
__m128i const_25 = __lsx_vldi(25);
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
__m128i shuff = {0x0B030A0209010800, 0x0F070E060D050C04};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
src0 = __lsx_vld(src_argb1555, 0);
|
||||
@ -384,8 +383,7 @@ void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555,
|
||||
reg1 = __lsx_vmaddwod_h_bu(reg1, tmpg, const_129);
|
||||
reg0 = __lsx_vmaddwev_h_bu(reg0, tmpr, const_66);
|
||||
reg1 = __lsx_vmaddwod_h_bu(reg1, tmpr, const_66);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
dst0 = __lsx_vshuf_b(dst0, dst0, shuff);
|
||||
dst0 = __lsx_vpackod_b(reg1, reg0);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_argb1555 += 32;
|
||||
@ -468,7 +466,6 @@ void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
|
||||
__m128i const_129 = __lsx_vldi(129);
|
||||
__m128i const_25 = __lsx_vldi(25);
|
||||
__m128i const_1080 = {0x1080108010801080, 0x1080108010801080};
|
||||
__m128i shuff = {0x0B030A0209010800, 0x0F070E060D050C04};
|
||||
|
||||
for (x = 0; x < len; x++) {
|
||||
src0 = __lsx_vld(src_rgb565, 0);
|
||||
@ -495,8 +492,7 @@ void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
|
||||
reg1 = __lsx_vmaddwod_h_bu(reg1, tmpg, const_129);
|
||||
reg0 = __lsx_vmaddwev_h_bu(reg0, tmpr, const_66);
|
||||
reg1 = __lsx_vmaddwod_h_bu(reg1, tmpr, const_66);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
dst0 = __lsx_vshuf_b(dst0, dst0, shuff);
|
||||
dst0 = __lsx_vpackod_b(reg1, reg0);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_rgb565 += 32;
|
||||
@ -591,7 +587,7 @@ void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_1080, tmp3, const_129);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp1);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
dst0 = __lsx_vpickod_b(reg1, reg0);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_rgb24 += 48;
|
||||
@ -939,7 +935,7 @@ void ARGBToYJRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
reg1 = __lsx_vmaddwev_h_bu(const_128, tmp3, const_150);
|
||||
reg0 = __lsx_vdp2add_h_bu(reg0, const_br, tmp0);
|
||||
reg1 = __lsx_vdp2add_h_bu(reg1, const_br, tmp2);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
dst0 = __lsx_vpickod_b(reg1, reg0);
|
||||
__lsx_vst(dst0, dst_y, 0);
|
||||
dst_y += 16;
|
||||
src_argb += 64;
|
||||
@ -1228,7 +1224,7 @@ void ARGBToUVJRow_LSX(const uint8_t* src_argb,
|
||||
reg1 = __lsx_vmsub_h(reg1, const_53, tmpg);
|
||||
reg0 = __lsx_vmsub_h(reg0, const_21, tmpr);
|
||||
reg1 = __lsx_vmsub_h(reg1, const_10, tmpb);
|
||||
dst0 = __lsx_vsrlni_b_h(reg1, reg0, 8);
|
||||
dst0 = __lsx_vpickod_b(reg1, reg0);
|
||||
__lsx_vstelm_d(dst0, dst_u, 0, 0);
|
||||
__lsx_vstelm_d(dst0, dst_v, 0, 1);
|
||||
dst_u += 8;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user