mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
[AArch64] Fix SVE/SME vector length printing in cpuid
A semicolon is treated as the start of a comment by some assemblers causing the vector length to be reported incorrectly, so use a newline instead. - Add volatile asm in row_gcc and row_neon64 Bug: b/5631539 Change-Id: I6b0836fcdd9247ef7b9e8ceda01df3150519ecf8 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5666060 Reviewed-by: Justin Green <greenjustin@google.com>
This commit is contained in:
parent
d32436e8f8
commit
611806a155
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: https://chromium.googlesource.com/libyuv/libyuv/
|
||||
Version: 1890
|
||||
Version: 1891
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
Shipped: yes
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1890
|
||||
#define LIBYUV_VERSION 1891
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -2626,7 +2626,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -2686,7 +2686,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
|
||||
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
|
||||
@ -2722,7 +2722,7 @@ void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
|
||||
uint8_t* dst_rgb24,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
|
||||
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
|
||||
@ -2758,7 +2758,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -2787,7 +2787,7 @@ void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants
|
||||
@ -2822,7 +2822,7 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -2852,7 +2852,7 @@ void OMITFP I212ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -2882,7 +2882,7 @@ void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -2917,7 +2917,7 @@ void OMITFP I212ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -2952,7 +2952,7 @@ void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -3045,7 +3045,7 @@ void OMITFP I410ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -3238,7 +3238,7 @@ void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $14,%%xmm5 \n"
|
||||
@ -3269,7 +3269,7 @@ void OMITFP P410ToAR30Row_SSSE3(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
"psrlw $14,%%xmm5 \n"
|
||||
@ -3301,7 +3301,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
|
||||
uint8_t* dst_rgba,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"pcmpeqb %%xmm5,%%xmm5 \n"
|
||||
@ -3712,7 +3712,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
@ -3746,7 +3746,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
@ -3786,7 +3786,7 @@ void OMITFP I422ToARGBRow_AVX512BW(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX512BW(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%xmm5,%%xmm5,%%xmm5 \n"
|
||||
@ -3825,7 +3825,7 @@ void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||
@ -3865,7 +3865,7 @@ void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
@ -3900,7 +3900,7 @@ void OMITFP I212ToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
@ -3935,7 +3935,7 @@ void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||
@ -3975,7 +3975,7 @@ void OMITFP I212ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||
@ -4015,7 +4015,7 @@ void OMITFP I410ToARGBRow_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
@ -4120,7 +4120,7 @@ void OMITFP I410ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||
@ -4228,7 +4228,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf,
|
||||
uint8_t* dst_argb,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"sub %[u_buf],%[v_buf] \n"
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||
@ -4430,7 +4430,7 @@ void OMITFP P210ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||
"vpsrlw $14,%%ymm5,%%ymm5 \n"
|
||||
@ -4467,7 +4467,7 @@ void OMITFP P410ToAR30Row_AVX2(const uint16_t* y_buf,
|
||||
uint8_t* dst_ar30,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
asm (
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP_AVX2(yuvconstants)
|
||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
|
||||
"vpsrlw $14,%%ymm5,%%ymm5 \n"
|
||||
@ -5681,9 +5681,7 @@ void MergeXRGBRow_AVX2(const uint8_t* src_r,
|
||||
const uint8_t* src_b,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm(
|
||||
|
||||
LABELALIGN
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
|
||||
"vmovdqu (%2),%%xmm0 \n" // B
|
||||
@ -7381,7 +7379,7 @@ void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
uintptr_t alpha;
|
||||
asm(
|
||||
asm volatile (
|
||||
// 4 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -7841,7 +7839,7 @@ void ARGBAddRow_SSE2(const uint8_t* src_argb,
|
||||
const uint8_t* src_argb1,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm(
|
||||
asm volatile (
|
||||
// 4 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -7869,7 +7867,7 @@ void ARGBAddRow_AVX2(const uint8_t* src_argb,
|
||||
const uint8_t* src_argb1,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm(
|
||||
asm volatile (
|
||||
// 4 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -7897,7 +7895,7 @@ void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
|
||||
const uint8_t* src_argb1,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm(
|
||||
asm volatile (
|
||||
// 4 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -7925,7 +7923,7 @@ void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
|
||||
const uint8_t* src_argb1,
|
||||
uint8_t* dst_argb,
|
||||
int width) {
|
||||
asm(
|
||||
asm volatile (
|
||||
// 4 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -9099,7 +9097,7 @@ void ARGBColorTableRow_X86(uint8_t* dst_argb,
|
||||
const uint8_t* table_argb,
|
||||
int width) {
|
||||
uintptr_t pixel_temp;
|
||||
asm(
|
||||
asm volatile (
|
||||
// 1 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
@ -9132,7 +9130,7 @@ void RGBColorTableRow_X86(uint8_t* dst_argb,
|
||||
const uint8_t* table_argb,
|
||||
int width) {
|
||||
uintptr_t pixel_temp;
|
||||
asm(
|
||||
asm volatile (
|
||||
// 1 pixel loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
|
||||
@ -2805,7 +2805,8 @@ static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
asm volatile (
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
@ -2863,7 +2864,8 @@ static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
|
||||
uint8_t* dst_y,
|
||||
int width,
|
||||
const struct RgbConstants* rgbconstants) {
|
||||
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
asm volatile (
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
@ -2920,7 +2922,8 @@ static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
|
||||
7, 9, 10, 12, 13, 15, 1, 0, 4, 0, 7, 0, 10,
|
||||
0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28, 0,
|
||||
31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
|
||||
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
asm volatile (
|
||||
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
|
||||
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
|
||||
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
|
||||
|
||||
@ -331,7 +331,8 @@ void I212ToAR30Row_NEON(const uint16_t* src_y,
|
||||
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||
const uint16_t limit = 0x3ff0;
|
||||
asm(YUVTORGB_SETUP
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"dup v22.8h, %w[limit] \n"
|
||||
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
||||
"1: \n" READYUV212 NVTORGB
|
||||
@ -400,7 +401,8 @@ void I212ToARGBRow_NEON(const uint16_t* src_y,
|
||||
int width) {
|
||||
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||
asm(YUVTORGB_SETUP
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"movi v19.8b, #255 \n"
|
||||
"1: \n" READYUV212 NVTORGB RGBTORGB8
|
||||
"subs %w[width], %w[width], #8 \n"
|
||||
@ -449,7 +451,8 @@ void I422ToAR30Row_NEON(const uint8_t* src_y,
|
||||
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
|
||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||
const uint16_t limit = 0x3ff0;
|
||||
asm(YUVTORGB_SETUP
|
||||
asm volatile (
|
||||
YUVTORGB_SETUP
|
||||
"dup v22.8h, %w[limit] \n"
|
||||
"movi v23.8h, #0xc0, lsl #8 \n" // A
|
||||
"1: \n" READYUV422 I4XXTORGB
|
||||
@ -4134,7 +4137,8 @@ void ARGBColorMatrixRow_NEON_I8MM(const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
const int8_t* matrix_argb,
|
||||
int width) {
|
||||
asm("ld1 {v31.16b}, [%[matrix_argb]] \n"
|
||||
asm volatile (
|
||||
"ld1 {v31.16b}, [%[matrix_argb]] \n"
|
||||
|
||||
"1: \n"
|
||||
"ld1 {v0.16b, v1.16b}, [%[src_argb]], #32 \n"
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
@ -66,18 +67,24 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
|
||||
printf("Has SVE2 0x%x\n", has_sve2);
|
||||
printf("Has SME 0x%x\n", has_sme);
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#if __aarch64__
|
||||
// Read and print the SVE and SME vector lengths.
|
||||
if (has_sve) {
|
||||
int sve_vl;
|
||||
// rdvl x0, #1
|
||||
asm(".inst 0x04bf5020; mov %w0, w0" : "=r"(sve_vl)::"x0");
|
||||
asm(".inst 0x04bf5020 \n" // rdvl x0, #1
|
||||
"mov %w[sve_vl], w0 \n"
|
||||
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
|
||||
:
|
||||
: "x0");
|
||||
printf("SVE vector length: %d bytes\n", sve_vl);
|
||||
}
|
||||
if (has_sme) {
|
||||
int sme_vl;
|
||||
// rdsvl x0, #1
|
||||
asm(".inst 0x04bf5820; mov %w0, w0" : "=r"(sme_vl)::"x0");
|
||||
asm(".inst 0x04bf5820 \n" // rdsvl x0, #1
|
||||
"mov %w[sme_vl], w0 \n"
|
||||
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
|
||||
:
|
||||
: "x0");
|
||||
printf("SME vector length: %d bytes\n", sme_vl);
|
||||
}
|
||||
#endif // defined(__aarch64__)
|
||||
|
||||
16
util/cpuid.c
16
util/cpuid.c
@ -69,18 +69,24 @@ int main(int argc, const char* argv[]) {
|
||||
printf("Has SVE2 0x%x\n", has_sve2);
|
||||
printf("Has SME 0x%x\n", has_sme);
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#if __aarch64__
|
||||
// Read and print the SVE and SME vector lengths.
|
||||
if (has_sve) {
|
||||
int sve_vl;
|
||||
// rdvl x0, #1
|
||||
asm(".inst 0x04bf5020; mov %w0, w0" : "=r"(sve_vl)::"x0");
|
||||
asm(".inst 0x04bf5020 \n" // rdvl x0, #1
|
||||
"mov %w[sve_vl], w0 \n"
|
||||
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
|
||||
:
|
||||
: "x0");
|
||||
printf("SVE vector length: %d bytes\n", sve_vl);
|
||||
}
|
||||
if (has_sme) {
|
||||
int sme_vl;
|
||||
// rdsvl x0, #1
|
||||
asm(".inst 0x04bf5820; mov %w0, w0" : "=r"(sme_vl)::"x0");
|
||||
asm(".inst 0x04bf5820 \n" // rdsvl x0, #1
|
||||
"mov %w[sme_vl], w0 \n"
|
||||
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
|
||||
:
|
||||
: "x0");
|
||||
printf("SME vector length: %d bytes\n", sme_vl);
|
||||
}
|
||||
#endif // defined(__aarch64__)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user