mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
[AArch64] Fix SVE/SME vector length printing in cpuid
A semicolon is treated as the start of a comment by some assemblers causing the vector length to be reported incorrectly, so use a newline instead. - Add volatile asm in row_gcc and row_neon64 Bug: b/5631539 Change-Id: I6b0836fcdd9247ef7b9e8ceda01df3150519ecf8 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5666060 Reviewed-by: Justin Green <greenjustin@google.com>
This commit is contained in:
parent
d32436e8f8
commit
611806a155
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||||
Version: 1890
|
Version: 1891
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
Shipped: yes
|
Shipped: yes
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1890
|
#define LIBYUV_VERSION 1891
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -2626,7 +2626,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -2686,7 +2686,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_rgb24,
|
uint8_t* dst_rgb24,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
|
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
|
||||||
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
|
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
|
||||||
@ -2722,7 +2722,7 @@ void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_rgb24,
|
uint8_t* dst_rgb24,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
|
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
|
||||||
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
|
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
|
||||||
@ -2758,7 +2758,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -2787,7 +2787,7 @@ void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants
|
"pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants
|
||||||
@ -2822,7 +2822,7 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -2852,7 +2852,7 @@ void OMITFP I212ToARGBRow_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -2882,7 +2882,7 @@ void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -2917,7 +2917,7 @@ void OMITFP I212ToAR30Row_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -2952,7 +2952,7 @@ void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -3045,7 +3045,7 @@ void OMITFP I410ToAR30Row_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -3238,7 +3238,7 @@ void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $14,%%xmm5 \n"
|
"psrlw $14,%%xmm5 \n"
|
||||||
@ -3269,7 +3269,7 @@ void OMITFP P410ToAR30Row_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
"psrlw $14,%%xmm5 \n"
|
"psrlw $14,%%xmm5 \n"
|
||||||
@ -3301,7 +3301,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_rgba,
|
uint8_t* dst_rgba,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP(yuvconstants)
|
YUVTORGB_SETUP(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
@ -3712,7 +3712,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
@ -3746,7 +3746,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
@ -3786,7 +3786,7 @@ void OMITFP I422ToARGBRow_AVX512BW(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX512BW(yuvconstants)
|
YUVTORGB_SETUP_AVX512BW(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%xmm5,%%xmm5,%%xmm5 \n"
|
"vpcmpeqb %%xmm5,%%xmm5,%%xmm5 \n"
|
||||||
@ -3825,7 +3825,7 @@ void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||||
@ -3865,7 +3865,7 @@ void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
@ -3900,7 +3900,7 @@ void OMITFP I212ToARGBRow_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
@ -3935,7 +3935,7 @@ void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||||
@ -3975,7 +3975,7 @@ void OMITFP I212ToAR30Row_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||||
@ -4015,7 +4015,7 @@ void OMITFP I410ToARGBRow_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
@ -4120,7 +4120,7 @@ void OMITFP I410ToAR30Row_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||||
@ -4228,7 +4228,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"sub %[u_buf],%[v_buf] \n"
|
"sub %[u_buf],%[v_buf] \n"
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
@ -4430,7 +4430,7 @@ void OMITFP P210ToAR30Row_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||||
"vpsrlw $14,%%ymm5,%%ymm5 \n"
|
"vpsrlw $14,%%ymm5,%%ymm5 \n"
|
||||||
@ -4467,7 +4467,7 @@ void OMITFP P410ToAR30Row_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_ar30,
|
uint8_t* dst_ar30,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm (
|
asm volatile (
|
||||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||||
"vpsrlw $14,%%ymm5,%%ymm5 \n"
|
"vpsrlw $14,%%ymm5,%%ymm5 \n"
|
||||||
@ -5681,9 +5681,7 @@ void MergeXRGBRow_AVX2(const uint8_t* src_r,
|
|||||||
const uint8_t* src_b,
|
const uint8_t* src_b,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm(
|
asm volatile (
|
||||||
|
|
||||||
LABELALIGN
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|
||||||
"vmovdqu (%2),%%xmm0 \n" // B
|
"vmovdqu (%2),%%xmm0 \n" // B
|
||||||
@ -7381,7 +7379,7 @@ void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
uintptr_t alpha;
|
uintptr_t alpha;
|
||||||
asm(
|
asm volatile (
|
||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -7841,7 +7839,7 @@ void ARGBAddRow_SSE2(const uint8_t* src_argb,
|
|||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm(
|
asm volatile (
|
||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -7869,7 +7867,7 @@ void ARGBAddRow_AVX2(const uint8_t* src_argb,
|
|||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm(
|
asm volatile (
|
||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -7897,7 +7895,7 @@ void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
|
|||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm(
|
asm volatile (
|
||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -7925,7 +7923,7 @@ void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
|
|||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm(
|
asm volatile (
|
||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -9099,7 +9097,7 @@ void ARGBColorTableRow_X86(uint8_t* dst_argb,
|
|||||||
const uint8_t* table_argb,
|
const uint8_t* table_argb,
|
||||||
int width) {
|
int width) {
|
||||||
uintptr_t pixel_temp;
|
uintptr_t pixel_temp;
|
||||||
asm(
|
asm volatile (
|
||||||
// 1 pixel loop.
|
// 1 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -9132,7 +9130,7 @@ void RGBColorTableRow_X86(uint8_t* dst_argb,
|
|||||||
const uint8_t* table_argb,
|
const uint8_t* table_argb,
|
||||||
int width) {
|
int width) {
|
||||||
uintptr_t pixel_temp;
|
uintptr_t pixel_temp;
|
||||||
asm(
|
asm volatile (
|
||||||
// 1 pixel loop.
|
// 1 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -2805,7 +2805,8 @@ static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
int width,
|
int width,
|
||||||
const struct RgbConstants* rgbconstants) {
|
const struct RgbConstants* rgbconstants) {
|
||||||
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
asm volatile (
|
||||||
|
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||||
@ -2863,7 +2864,8 @@ static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
|
|||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
int width,
|
int width,
|
||||||
const struct RgbConstants* rgbconstants) {
|
const struct RgbConstants* rgbconstants) {
|
||||||
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
asm volatile (
|
||||||
|
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||||
@ -2920,7 +2922,8 @@ static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
|
|||||||
7, 9, 10, 12, 13, 15, 1, 0, 4, 0, 7, 0, 10,
|
7, 9, 10, 12, 13, 15, 1, 0, 4, 0, 7, 0, 10,
|
||||||
0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28, 0,
|
0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28, 0,
|
||||||
31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
|
31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
|
||||||
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
asm volatile (
|
||||||
|
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||||
|
|||||||
@ -331,7 +331,8 @@ void I212ToAR30Row_NEON(const uint16_t* src_y,
|
|||||||
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
||||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||||
const uint16_t limit = 0x3ff0;
|
const uint16_t limit = 0x3ff0;
|
||||||
asm(YUVTORGB_SETUP
|
asm volatile (
|
||||||
|
YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
||||||
"1: \n" READYUV212 NVTORGB
|
"1: \n" READYUV212 NVTORGB
|
||||||
@ -400,7 +401,8 @@ void I212ToARGBRow_NEON(const uint16_t* src_y,
|
|||||||
int width) {
|
int width) {
|
||||||
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
||||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||||
asm(YUVTORGB_SETUP
|
asm volatile (
|
||||||
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"1: \n" READYUV212 NVTORGB RGBTORGB8
|
"1: \n" READYUV212 NVTORGB RGBTORGB8
|
||||||
"subs %w[width], %w[width], #8 \n"
|
"subs %w[width], %w[width], #8 \n"
|
||||||
@ -449,7 +451,8 @@ void I422ToAR30Row_NEON(const uint8_t* src_y,
|
|||||||
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
||||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||||
const uint16_t limit = 0x3ff0;
|
const uint16_t limit = 0x3ff0;
|
||||||
asm(YUVTORGB_SETUP
|
asm volatile (
|
||||||
|
YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
||||||
"1: \n" READYUV422 I4XXTORGB
|
"1: \n" READYUV422 I4XXTORGB
|
||||||
@ -4134,7 +4137,8 @@ void ARGBColorMatrixRow_NEON_I8MM(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const int8_t* matrix_argb,
|
const int8_t* matrix_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm("ld1 {v31.16b}, [%[matrix_argb]] \n"
|
asm volatile (
|
||||||
|
"ld1 {v31.16b}, [%[matrix_argb]] \n"
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v0.16b, v1.16b}, [%[src_argb]], #32 \n"
|
"ld1 {v0.16b, v1.16b}, [%[src_argb]], #32 \n"
|
||||||
|
|||||||
@ -8,6 +8,7 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
@ -66,18 +67,24 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
|
|||||||
printf("Has SVE2 0x%x\n", has_sve2);
|
printf("Has SVE2 0x%x\n", has_sve2);
|
||||||
printf("Has SME 0x%x\n", has_sme);
|
printf("Has SME 0x%x\n", has_sme);
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
#if __aarch64__
|
||||||
// Read and print the SVE and SME vector lengths.
|
// Read and print the SVE and SME vector lengths.
|
||||||
if (has_sve) {
|
if (has_sve) {
|
||||||
int sve_vl;
|
int sve_vl;
|
||||||
// rdvl x0, #1
|
asm(".inst 0x04bf5020 \n" // rdvl x0, #1
|
||||||
asm(".inst 0x04bf5020; mov %w0, w0" : "=r"(sve_vl)::"x0");
|
"mov %w[sve_vl], w0 \n"
|
||||||
|
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
|
||||||
|
:
|
||||||
|
: "x0");
|
||||||
printf("SVE vector length: %d bytes\n", sve_vl);
|
printf("SVE vector length: %d bytes\n", sve_vl);
|
||||||
}
|
}
|
||||||
if (has_sme) {
|
if (has_sme) {
|
||||||
int sme_vl;
|
int sme_vl;
|
||||||
// rdsvl x0, #1
|
asm(".inst 0x04bf5820 \n" // rdsvl x0, #1
|
||||||
asm(".inst 0x04bf5820; mov %w0, w0" : "=r"(sme_vl)::"x0");
|
"mov %w[sme_vl], w0 \n"
|
||||||
|
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
|
||||||
|
:
|
||||||
|
: "x0");
|
||||||
printf("SME vector length: %d bytes\n", sme_vl);
|
printf("SME vector length: %d bytes\n", sme_vl);
|
||||||
}
|
}
|
||||||
#endif // defined(__aarch64__)
|
#endif // defined(__aarch64__)
|
||||||
|
|||||||
16
util/cpuid.c
16
util/cpuid.c
@ -69,18 +69,24 @@ int main(int argc, const char* argv[]) {
|
|||||||
printf("Has SVE2 0x%x\n", has_sve2);
|
printf("Has SVE2 0x%x\n", has_sve2);
|
||||||
printf("Has SME 0x%x\n", has_sme);
|
printf("Has SME 0x%x\n", has_sme);
|
||||||
|
|
||||||
#if defined(__aarch64__)
|
#if __aarch64__
|
||||||
// Read and print the SVE and SME vector lengths.
|
// Read and print the SVE and SME vector lengths.
|
||||||
if (has_sve) {
|
if (has_sve) {
|
||||||
int sve_vl;
|
int sve_vl;
|
||||||
// rdvl x0, #1
|
asm(".inst 0x04bf5020 \n" // rdvl x0, #1
|
||||||
asm(".inst 0x04bf5020; mov %w0, w0" : "=r"(sve_vl)::"x0");
|
"mov %w[sve_vl], w0 \n"
|
||||||
|
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
|
||||||
|
:
|
||||||
|
: "x0");
|
||||||
printf("SVE vector length: %d bytes\n", sve_vl);
|
printf("SVE vector length: %d bytes\n", sve_vl);
|
||||||
}
|
}
|
||||||
if (has_sme) {
|
if (has_sme) {
|
||||||
int sme_vl;
|
int sme_vl;
|
||||||
// rdsvl x0, #1
|
asm(".inst 0x04bf5820 \n" // rdsvl x0, #1
|
||||||
asm(".inst 0x04bf5820; mov %w0, w0" : "=r"(sme_vl)::"x0");
|
"mov %w[sme_vl], w0 \n"
|
||||||
|
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
|
||||||
|
:
|
||||||
|
: "x0");
|
||||||
printf("SME vector length: %d bytes\n", sme_vl);
|
printf("SME vector length: %d bytes\n", sme_vl);
|
||||||
}
|
}
|
||||||
#endif // defined(__aarch64__)
|
#endif // defined(__aarch64__)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user