libyuv

mirror of https://chromium.googlesource.com/libyuv/libyuv synced 2026-01-01 03:12:16 +08:00

History

Frank Barchard 451af5e922 scale by 1 for neon implemented void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { asm volatile ( "1: \n" MEMACCESS(0) "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts "subs %w2, %w2, #8 \n" // 8 pixels per loop "uxtl v2.4s, v1.4h \n" // 8 int's "uxtl2 v1.4s, v1.8h \n" "scvtf v2.4s, v2.4s \n" // 8 floats "scvtf v1.4s, v1.4s \n" "fcvtn v4.4h, v2.4s \n" // 8 floatsgit "fcvtn2 v4.8h, v1.4s \n" MEMACCESS(1) "st1 {v4.16b}, [%1], #16 \n" // store 8 shorts "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "cc", "memory", "v1", "v2", "v4" ); } void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { asm volatile ( "1: \n" MEMACCESS(0) "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts "subs %w2, %w2, #8 \n" // 8 pixels per loop "uxtl v2.4s, v1.4h \n" // 8 int's "uxtl2 v1.4s, v1.8h \n" "scvtf v2.4s, v2.4s \n" // 8 floats "scvtf v1.4s, v1.4s \n" "fmul v2.4s, v2.4s, %3.s[0] \n" // adjust exponent "fmul v1.4s, v1.4s, %3.s[0] \n" "uqshrn v4.4h, v2.4s, #13 \n" // isolate halffloat "uqshrn2 v4.8h, v1.4s, #13 \n" MEMACCESS(1) "st1 {v4.16b}, [%1], #16 \n" // store 8 shorts "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "w"(scale * 1.9259299444e-34f) // %3 : "cc", "memory", "v1", "v2", "v4" ); } TEST=LibYUVPlanarTest.TestHalfFloatPlane_One BUG=libyuv:560 R=hubbe@chromium.org Review URL: https://codereview.chromium.org/2430313008 .		2016-10-21 14:30:03 -07:00
..
compare_common.cc	xmmword cast for clang	2015-08-18 11:13:12 -07:00
compare_gcc.cc	nolint removed	2015-08-31 10:52:13 -07:00
compare_neon64.cc	xmmword cast for clang	2015-08-18 11:13:12 -07:00
compare_neon.cc	xmmword cast for clang	2015-08-18 11:13:12 -07:00
compare_win.cc	xmmword cast for clang	2015-08-18 11:13:12 -07:00
compare.cc	xmmword cast for clang	2015-08-18 11:13:12 -07:00
convert_argb.cc	Add MSA optimized ARGB4444ToI420 and ARGB4444ToARGB functions	2016-10-19 11:10:51 -07:00
convert_from_argb.cc	Add MSA optimized ARGB4444ToI420 and ARGB4444ToARGB functions	2016-10-19 11:10:51 -07:00
convert_from.cc	Remove I411 support.	2016-10-11 11:14:16 -07:00
convert_jpeg.cc	Remove I411 support.	2016-10-11 11:14:16 -07:00
convert_to_argb.cc	Remove I411 support.	2016-10-11 11:14:16 -07:00
convert_to_i420.cc	Remove I411 support.	2016-10-11 11:14:16 -07:00
convert.cc	Add MSA optimized ARGB4444ToI420 and ARGB4444ToARGB functions	2016-10-19 11:10:51 -07:00
cpu_id.cc	cpu_id cleanup. no functional change.	2016-10-18 12:26:02 -07:00
mjpeg_decoder.cc	Suppress MJPEG fprintf() runtime warning	2016-08-22 16:30:36 -07:00
mjpeg_validate.cc	validate scan EOI from end for better coverage	2015-09-14 10:58:51 -07:00
planar_functions.cc	scale by 1 for neon implemented	2016-10-21 14:30:03 -07:00
rotate_any.cc	rename MIPS_DSPR2 to DSPR2 for consistency	2016-02-05 14:49:54 -08:00
rotate_argb.cc	Add MIPS SIMD Arch (MSA) optimized ARGBMirrorRow function	2016-09-26 16:28:01 -07:00
rotate_common.cc	rotate include and proto cleanup	2015-07-22 18:09:04 -07:00
rotate_gcc.cc	use visual c 32 bit code for clangcl	2015-08-11 10:10:45 -07:00
rotate_mips.cc	white space fixes for MIPS	2016-05-24 14:17:18 -07:00
rotate_neon64.cc	Remove initialize to zero on output variables for inline.	2016-04-18 16:24:26 -07:00
rotate_neon.cc	Remove initialize to zero on output variables for inline.	2016-04-18 16:24:26 -07:00
rotate_win.cc	use visual c 32 bit code for clangcl	2015-08-11 10:10:45 -07:00
rotate.cc	Add MIPS SIMD Arch (MSA) optimized MirrorRow function	2016-09-22 16:12:22 -07:00
row_any.cc	scale by 1 for neon implemented	2016-10-21 14:30:03 -07:00
row_common.cc	Remove I411 support.	2016-10-11 11:14:16 -07:00
row_gcc.cc	scale by 1 for neon implemented	2016-10-21 14:30:03 -07:00
row_mips.cc	white space fixes for MIPS	2016-05-24 14:17:18 -07:00
row_msa.cc	Add MSA optimized ARGB4444ToI420 and ARGB4444ToARGB functions	2016-10-19 11:10:51 -07:00
row_neon64.cc	scale by 1 for neon implemented	2016-10-21 14:30:03 -07:00
row_neon.cc	Remove I411 support.	2016-10-11 11:14:16 -07:00
row_win.cc	Port HalfFloatRow_SSE2 to AVX2 but not using F16C.	2016-10-14 19:01:41 -07:00
scale_any.cc	Odd width variation of scale down by 2 for subsampling	2016-01-06 15:12:17 -08:00
scale_argb.cc	rename MIPS_DSPR2 to DSPR2 for consistency	2016-02-05 14:49:54 -08:00
scale_common.cc	Fix some comment typos	2016-09-15 15:38:19 -07:00
scale_gcc.cc	Fix some comment typos	2016-09-15 15:38:19 -07:00
scale_mips.cc	rename MIPS_DSPR2 to DSPR2 for consistency	2016-02-05 14:49:54 -08:00
scale_neon64.cc	fix multi-line comment warning	2016-09-16 15:16:39 -07:00
scale_neon.cc	fix multi-line comment warning	2016-09-16 15:16:39 -07:00
scale_win.cc	YUV scale filter columns improved filtering accuracy	2016-06-23 20:16:55 -07:00
scale.cc	Scale by 3/8 only if source is multiple of 8 tall.	2016-09-16 14:57:47 -07:00
video_common.cc	Treat YU12 as an alias for I420. Simplify setting of inv_crop_height.	2016-06-16 12:49:17 +02:00