[AArch64] Fix SVE/SME vector length printing in cpuid

A semicolon is treated as the start of a comment by some assemblers
causing the vector length to be reported incorrectly, so use a newline
instead.

- Add volatile asm in row_gcc and row_neon64

Bug: b/5631539
Change-Id: I6b0836fcdd9247ef7b9e8ceda01df3150519ecf8
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5666060
Reviewed-by: Justin Green <greenjustin@google.com>
This commit is contained in:
Frank Barchard 2024-07-02 11:34:39 -07:00
parent d32436e8f8
commit 611806a155
7 changed files with 74 additions and 56 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1890
Version: 1891
License: BSD
License File: LICENSE
Shipped: yes

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1890
#define LIBYUV_VERSION 1891
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -2626,7 +2626,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -2686,7 +2686,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
@ -2722,7 +2722,7 @@ void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
"movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
@ -2758,7 +2758,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -2787,7 +2787,7 @@ void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants
@ -2822,7 +2822,7 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -2852,7 +2852,7 @@ void OMITFP I212ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -2882,7 +2882,7 @@ void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -2917,7 +2917,7 @@ void OMITFP I212ToAR30Row_SSSE3(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -2952,7 +2952,7 @@ void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -3045,7 +3045,7 @@ void OMITFP I410ToAR30Row_SSSE3(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -3238,7 +3238,7 @@ void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n"
@ -3269,7 +3269,7 @@ void OMITFP P410ToAR30Row_SSSE3(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $14,%%xmm5 \n"
@ -3301,7 +3301,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf,
uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
@ -3712,7 +3712,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
@ -3746,7 +3746,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
@ -3786,7 +3786,7 @@ void OMITFP I422ToARGBRow_AVX512BW(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX512BW(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%xmm5,%%xmm5,%%xmm5 \n"
@ -3825,7 +3825,7 @@ void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
@ -3865,7 +3865,7 @@ void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
@ -3900,7 +3900,7 @@ void OMITFP I212ToARGBRow_AVX2(const uint16_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
@ -3935,7 +3935,7 @@ void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
@ -3975,7 +3975,7 @@ void OMITFP I212ToAR30Row_AVX2(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
@ -4015,7 +4015,7 @@ void OMITFP I410ToARGBRow_AVX2(const uint16_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
@ -4120,7 +4120,7 @@ void OMITFP I410ToAR30Row_AVX2(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
@ -4228,7 +4228,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
@ -4430,7 +4430,7 @@ void OMITFP P210ToAR30Row_AVX2(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n"
@ -4467,7 +4467,7 @@ void OMITFP P410ToAR30Row_AVX2(const uint16_t* y_buf,
uint8_t* dst_ar30,
const struct YuvConstants* yuvconstants,
int width) {
asm (
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants
"vpsrlw $14,%%ymm5,%%ymm5 \n"
@ -5681,9 +5681,7 @@ void MergeXRGBRow_AVX2(const uint8_t* src_r,
const uint8_t* src_b,
uint8_t* dst_argb,
int width) {
asm(
LABELALIGN
asm volatile (
"1: \n"
"vmovdqu (%2),%%xmm0 \n" // B
@ -7381,7 +7379,7 @@ void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
uintptr_t alpha;
asm(
asm volatile (
// 4 pixel loop.
LABELALIGN
"1: \n"
@ -7841,7 +7839,7 @@ void ARGBAddRow_SSE2(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
asm(
asm volatile (
// 4 pixel loop.
LABELALIGN
"1: \n"
@ -7869,7 +7867,7 @@ void ARGBAddRow_AVX2(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
asm(
asm volatile (
// 4 pixel loop.
LABELALIGN
"1: \n"
@ -7897,7 +7895,7 @@ void ARGBSubtractRow_SSE2(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
asm(
asm volatile (
// 4 pixel loop.
LABELALIGN
"1: \n"
@ -7925,7 +7923,7 @@ void ARGBSubtractRow_AVX2(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width) {
asm(
asm volatile (
// 4 pixel loop.
LABELALIGN
"1: \n"
@ -9099,7 +9097,7 @@ void ARGBColorTableRow_X86(uint8_t* dst_argb,
const uint8_t* table_argb,
int width) {
uintptr_t pixel_temp;
asm(
asm volatile (
// 1 pixel loop.
LABELALIGN
"1: \n"
@ -9132,7 +9130,7 @@ void RGBColorTableRow_X86(uint8_t* dst_argb,
const uint8_t* table_argb,
int width) {
uintptr_t pixel_temp;
asm(
asm volatile (
// 1 pixel loop.
LABELALIGN
"1: \n"

View File

@ -2805,7 +2805,8 @@ static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
asm volatile (
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
@ -2863,7 +2864,8 @@ static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba,
uint8_t* dst_y,
int width,
const struct RgbConstants* rgbconstants) {
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
asm volatile (
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants
@ -2920,7 +2922,8 @@ static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
7, 9, 10, 12, 13, 15, 1, 0, 4, 0, 7, 0, 10,
0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28, 0,
31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0};
asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
asm volatile (
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
"vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants
"vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants

View File

@ -331,7 +331,8 @@ void I212ToAR30Row_NEON(const uint16_t* src_y,
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
const uint16_t limit = 0x3ff0;
asm(YUVTORGB_SETUP
asm volatile (
YUVTORGB_SETUP
"dup v22.8h, %w[limit] \n"
"movi v23.8h, #0xc0, lsl #8 \n" // A
"1: \n" READYUV212 NVTORGB
@ -400,7 +401,8 @@ void I212ToARGBRow_NEON(const uint16_t* src_y,
int width) {
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
asm(YUVTORGB_SETUP
asm volatile (
YUVTORGB_SETUP
"movi v19.8b, #255 \n"
"1: \n" READYUV212 NVTORGB RGBTORGB8
"subs %w[width], %w[width], #8 \n"
@ -449,7 +451,8 @@ void I422ToAR30Row_NEON(const uint8_t* src_y,
const uvec8* uv_coeff = &yuvconstants->kUVCoeff;
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
const uint16_t limit = 0x3ff0;
asm(YUVTORGB_SETUP
asm volatile (
YUVTORGB_SETUP
"dup v22.8h, %w[limit] \n"
"movi v23.8h, #0xc0, lsl #8 \n" // A
"1: \n" READYUV422 I4XXTORGB
@ -4134,7 +4137,8 @@ void ARGBColorMatrixRow_NEON_I8MM(const uint8_t* src_argb,
uint8_t* dst_argb,
const int8_t* matrix_argb,
int width) {
asm("ld1 {v31.16b}, [%[matrix_argb]] \n"
asm volatile (
"ld1 {v31.16b}, [%[matrix_argb]] \n"
"1: \n"
"ld1 {v0.16b, v1.16b}, [%[src_argb]], #32 \n"

View File

@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -66,18 +67,24 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has SVE2 0x%x\n", has_sve2);
printf("Has SME 0x%x\n", has_sme);
#if defined(__aarch64__)
#if __aarch64__
// Read and print the SVE and SME vector lengths.
if (has_sve) {
int sve_vl;
// rdvl x0, #1
asm(".inst 0x04bf5020; mov %w0, w0" : "=r"(sve_vl)::"x0");
asm(".inst 0x04bf5020 \n" // rdvl x0, #1
"mov %w[sve_vl], w0 \n"
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
:
: "x0");
printf("SVE vector length: %d bytes\n", sve_vl);
}
if (has_sme) {
int sme_vl;
// rdsvl x0, #1
asm(".inst 0x04bf5820; mov %w0, w0" : "=r"(sme_vl)::"x0");
asm(".inst 0x04bf5820 \n" // rdsvl x0, #1
"mov %w[sme_vl], w0 \n"
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
:
: "x0");
printf("SME vector length: %d bytes\n", sme_vl);
}
#endif // defined(__aarch64__)

View File

@ -69,18 +69,24 @@ int main(int argc, const char* argv[]) {
printf("Has SVE2 0x%x\n", has_sve2);
printf("Has SME 0x%x\n", has_sme);
#if defined(__aarch64__)
#if __aarch64__
// Read and print the SVE and SME vector lengths.
if (has_sve) {
int sve_vl;
// rdvl x0, #1
asm(".inst 0x04bf5020; mov %w0, w0" : "=r"(sve_vl)::"x0");
asm(".inst 0x04bf5020 \n" // rdvl x0, #1
"mov %w[sve_vl], w0 \n"
: [sve_vl] "=r"(sve_vl) // %[sve_vl]
:
: "x0");
printf("SVE vector length: %d bytes\n", sve_vl);
}
if (has_sme) {
int sme_vl;
// rdsvl x0, #1
asm(".inst 0x04bf5820; mov %w0, w0" : "=r"(sme_vl)::"x0");
asm(".inst 0x04bf5820 \n" // rdsvl x0, #1
"mov %w[sme_vl], w0 \n"
: [sme_vl] "=r"(sme_vl) // %[sme_vl]
:
: "x0");
printf("SME vector length: %d bytes\n", sme_vl);
}
#endif // defined(__aarch64__)