mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/920005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@434 16f28f9a-4ce2-e073-06de-1de4eb20be90
244 lines
12 KiB
C++
244 lines
12 KiB
C++
/*
|
|
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "libyuv/basic_types.h"
|
|
#include "libyuv/row.h"
|
|
|
|
#ifdef __cplusplus
|
|
namespace libyuv {
|
|
extern "C" {
|
|
#endif
|
|
|
|
// This module is for GCC MIPS DSPR2
|
|
#if !defined(YUV_DISABLE_ASM) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
|
|
|
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t /* src_stride */,
|
|
uint8* dst, int dst_width) {
|
|
__asm__ __volatile__(
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
|
|
"srl $t9, %[dst_width], 4 \n" // iterations -> by 16
|
|
"beqz $t9, 2f \n"
|
|
" nop \n"
|
|
|
|
"1: \n"
|
|
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
|
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
|
|
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
|
|
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
|
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
|
|
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
|
|
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
|
|
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
|
|
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
|
"addiu $t9, $t9, -1 \n"
|
|
"sw $t8, 0(%[dst]) \n"
|
|
"sw $t0, 4(%[dst]) \n"
|
|
"sw $t1, 8(%[dst]) \n"
|
|
"sw $t2, 12(%[dst]) \n"
|
|
"bgtz $t9, 1b \n"
|
|
" addiu %[dst], %[dst], 16 \n"
|
|
|
|
"2: \n"
|
|
"andi $t9, %[dst_width], 0xf \n" // residue
|
|
"beqz $t9, 3f \n"
|
|
" nop \n"
|
|
|
|
"21: \n"
|
|
"lbu $t0, 0(%[src_ptr]) \n"
|
|
"addiu %[src_ptr], %[src_ptr], 2 \n"
|
|
"addiu $t9, $t9, -1 \n"
|
|
"sb $t0, 0(%[dst]) \n"
|
|
"bgtz $t9, 21b \n"
|
|
" addiu %[dst], %[dst], 1 \n"
|
|
|
|
"3: \n"
|
|
".set pop \n"
|
|
: [src_ptr] "+r" (src_ptr),
|
|
[dst] "+r" (dst)
|
|
: [dst_width] "r" (dst_width)
|
|
: "t0", "t1", "t2", "t3", "t4", "t5",
|
|
"t6", "t7", "t8", "t9"
|
|
);
|
|
}
|
|
|
|
void ScaleRowDown2Int_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
|
uint8* dst, int dst_width) {
|
|
|
|
const uint8* t = src_ptr + src_stride;
|
|
|
|
__asm__ __volatile__ (
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
|
|
"srl $t9, %[dst_width], 3 \n" // iterations -> step 8
|
|
"bltz $t9, 2f \n"
|
|
" nop \n"
|
|
|
|
"1: \n"
|
|
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
|
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
|
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
|
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
|
"lw $t4, 0(%[t]) \n" // |19|18|17|16|
|
|
"lw $t5, 4(%[t]) \n" // |23|22|21|20|
|
|
"lw $t6, 8(%[t]) \n" // |27|26|25|24|
|
|
"lw $t7, 12(%[t]) \n" // |31|30|29|28|
|
|
"addiu $t9, $t9, -1 \n"
|
|
"srl $t8, $t0, 16 \n" // |X|X|3|2|
|
|
"ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
|
|
"ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
|
|
"raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
|
|
"raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
|
|
"shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
|
|
"shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
|
|
"srl $t8, $t1, 16 \n" // |X|X|7|6|
|
|
"ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
|
|
"ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
|
|
"raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
|
|
"raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
|
|
"shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
|
|
"shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
|
|
"srl $t8, $t2, 16 \n" // |X|X|11|10|
|
|
"ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
|
|
"ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
|
|
"raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
|
|
"raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
|
|
"shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
|
|
"shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
|
|
"srl $t8, $t3, 16 \n" // |X|X|15|14|
|
|
"ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
|
|
"ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
|
|
"raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
|
|
"raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
|
|
"shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
|
|
"shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
|
|
"addiu %[src_ptr], %[src_ptr], 16 \n"
|
|
"addiu %[t], %[t], 16 \n"
|
|
"sb $t0, 0(%[dst]) \n"
|
|
"sb $t4, 1(%[dst]) \n"
|
|
"sb $t1, 2(%[dst]) \n"
|
|
"sb $t5, 3(%[dst]) \n"
|
|
"sb $t2, 4(%[dst]) \n"
|
|
"sb $t6, 5(%[dst]) \n"
|
|
"sb $t3, 6(%[dst]) \n"
|
|
"sb $t7, 7(%[dst]) \n"
|
|
"bgtz $t9, 1b \n"
|
|
" addiu %[dst], %[dst], 8 \n"
|
|
|
|
"2: \n"
|
|
"andi $t9, %[dst_width], 0x7 \n" // x = residue
|
|
"beqz $t9, 3f \n"
|
|
" nop \n"
|
|
|
|
"21: \n"
|
|
"lwr $t1, 0(%[src_ptr]) \n"
|
|
"lwl $t1, 3(%[src_ptr]) \n"
|
|
"lwr $t2, 0(%[t]) \n"
|
|
"lwl $t2, 3(%[t]) \n"
|
|
"srl $t8, $t1, 16 \n"
|
|
"ins $t1, $t2, 16, 16 \n"
|
|
"ins $t2, $t8, 0, 16 \n"
|
|
"raddu.w.qb $t1, $t1 \n"
|
|
"raddu.w.qb $t2, $t2 \n"
|
|
"shra_r.w $t1, $t1, 2 \n"
|
|
"shra_r.w $t2, $t2, 2 \n"
|
|
"sb $t1, 0(%[dst]) \n"
|
|
"sb $t2, 1(%[dst]) \n"
|
|
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
|
"addiu $t9, $t9, -2 \n"
|
|
"addiu %[t], %[t], 4 \n"
|
|
"bgtz $t9, 21b \n"
|
|
" addiu %[dst], %[dst], 2 \n"
|
|
|
|
"3: \n"
|
|
".set pop \n"
|
|
|
|
: [src_ptr] "+r" (src_ptr),
|
|
[dst] "+r" (dst), [t] "+r" (t)
|
|
: [dst_width] "r" (dst_width)
|
|
: "t0", "t1", "t2", "t3", "t4", "t5",
|
|
"t6", "t7", "t8", "t9"
|
|
);
|
|
}
|
|
|
|
void ScaleFilterRows_MIPS_DSPR2(unsigned char *dst_ptr,
|
|
const unsigned char* src_ptr,
|
|
ptrdiff_t src_stride,
|
|
int dst_width, int source_y_fraction) {
|
|
|
|
int y0_fraction = 256 - source_y_fraction;
|
|
const unsigned char* src_ptr1 = src_ptr + src_stride;
|
|
|
|
__asm__ __volatile__ (
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
|
|
"replv.ph $t0, %[y0_fraction] \n"
|
|
"replv.ph $t1, %[source_y_fraction] \n"
|
|
"1: \n"
|
|
"lw $t2, 0(%[src_ptr]) \n"
|
|
"lw $t3, 0(%[src_ptr1]) \n"
|
|
"lw $t4, 4(%[src_ptr]) \n"
|
|
"lw $t5, 4(%[src_ptr1]) \n"
|
|
"muleu_s.ph.qbl $t6, $t2, $t0 \n"
|
|
"muleu_s.ph.qbr $t7, $t2, $t0 \n"
|
|
"muleu_s.ph.qbl $t8, $t3, $t1 \n"
|
|
"muleu_s.ph.qbr $t9, $t3, $t1 \n"
|
|
"muleu_s.ph.qbl $t2, $t4, $t0 \n"
|
|
"muleu_s.ph.qbr $t3, $t4, $t0 \n"
|
|
"muleu_s.ph.qbl $t4, $t5, $t1 \n"
|
|
"muleu_s.ph.qbr $t5, $t5, $t1 \n"
|
|
"addq.ph $t6, $t6, $t8 \n"
|
|
"addq.ph $t7, $t7, $t9 \n"
|
|
"addq.ph $t2, $t2, $t4 \n"
|
|
"addq.ph $t3, $t3, $t5 \n"
|
|
"shra.ph $t6, $t6, 8 \n"
|
|
"shra.ph $t7, $t7, 8 \n"
|
|
"shra.ph $t2, $t2, 8 \n"
|
|
"shra.ph $t3, $t3, 8 \n"
|
|
"precr.qb.ph $t6, $t6, $t7 \n"
|
|
"precr.qb.ph $t2, $t2, $t3 \n"
|
|
"addiu %[src_ptr], %[src_ptr], 8 \n"
|
|
"addiu %[src_ptr1], %[src_ptr1], 8 \n"
|
|
"addiu %[dst_width], %[dst_width], -8 \n"
|
|
"sw $t6, 0(%[dst_ptr]) \n"
|
|
"sw $t2, 4(%[dst_ptr]) \n"
|
|
"bgtz %[dst_width], 1b \n"
|
|
" addiu %[dst_ptr], %[dst_ptr], 8 \n"
|
|
|
|
"lbu $t0, -1(%[dst_ptr]) \n"
|
|
"sb $t0, 0(%[dst_ptr]) \n"
|
|
".set pop \n"
|
|
: [dst_ptr] "+r" (dst_ptr),
|
|
[src_ptr1] "+r" (src_ptr1),
|
|
[src_ptr] "+r" (src_ptr),
|
|
[dst_width] "+r" (dst_width)
|
|
: [source_y_fraction] "r" (source_y_fraction),
|
|
[y0_fraction] "r" (y0_fraction),
|
|
[src_stride] "r" (src_stride)
|
|
: "t0", "t1", "t2", "t3", "t4", "t5",
|
|
"t6", "t7", "t8", "t9"
|
|
);
|
|
}
|
|
|
|
#endif // if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
|
|
|
#ifdef __cplusplus
|
|
} // extern "C"
|
|
} // namespace libyuv
|
|
#endif
|
|
|