diff --git a/README.chromium b/README.chromium index 1a852d7f8..ac18acfe4 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv/ -Version: 1890 +Version: 1891 License: BSD License File: LICENSE Shipped: yes diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 761bce145..535a939ac 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1890 +#define LIBYUV_VERSION 1891 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 782382d0c..69babb453 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -2626,7 +2626,7 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -2686,7 +2686,7 @@ void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" @@ -2722,7 +2722,7 @@ void OMITFP I444ToRGB24Row_SSSE3(const uint8_t* y_buf, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" @@ -2758,7 +2758,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -2787,7 +2787,7 @@ void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants @@ -2822,7 +2822,7 @@ void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -2852,7 +2852,7 @@ void OMITFP I212ToARGBRow_SSSE3(const uint16_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -2882,7 +2882,7 @@ void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -2917,7 +2917,7 @@ void OMITFP I212ToAR30Row_SSSE3(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -2952,7 +2952,7 @@ void OMITFP I410ToARGBRow_SSSE3(const uint16_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -3045,7 +3045,7 @@ void OMITFP I410ToAR30Row_SSSE3(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -3238,7 +3238,7 @@ void OMITFP P210ToAR30Row_SSSE3(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $14,%%xmm5 \n" @@ -3269,7 +3269,7 @@ void OMITFP P410ToAR30Row_SSSE3(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $14,%%xmm5 \n" @@ -3301,7 +3301,7 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" @@ -3712,7 +3712,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" @@ -3746,7 +3746,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" @@ -3786,7 +3786,7 @@ void OMITFP I422ToARGBRow_AVX512BW(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX512BW(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%xmm5,%%xmm5,%%xmm5 \n" @@ -3825,7 +3825,7 @@ void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants @@ -3865,7 +3865,7 @@ void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" @@ -3900,7 +3900,7 @@ void OMITFP I212ToARGBRow_AVX2(const uint16_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" @@ -3935,7 +3935,7 @@ void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants @@ -3975,7 +3975,7 @@ void OMITFP I212ToAR30Row_AVX2(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants @@ -4015,7 +4015,7 @@ void OMITFP I410ToARGBRow_AVX2(const uint16_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" @@ -4120,7 +4120,7 @@ void OMITFP I410ToAR30Row_AVX2(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants @@ -4228,7 +4228,7 @@ void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" @@ -4430,7 +4430,7 @@ void OMITFP P210ToAR30Row_AVX2(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants "vpsrlw $14,%%ymm5,%%ymm5 \n" @@ -4467,7 +4467,7 @@ void OMITFP P410ToAR30Row_AVX2(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { - asm ( + asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants "vpsrlw $14,%%ymm5,%%ymm5 \n" @@ -5681,9 +5681,7 @@ void MergeXRGBRow_AVX2(const uint8_t* src_r, const uint8_t* src_b, uint8_t* dst_argb, int width) { - asm( - - LABELALIGN + asm volatile ( "1: \n" "vmovdqu (%2),%%xmm0 \n" // B @@ -7381,7 +7379,7 @@ void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { uintptr_t alpha; - asm( + asm volatile ( // 4 pixel loop. LABELALIGN "1: \n" @@ -7841,7 +7839,7 @@ void ARGBAddRow_SSE2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { - asm( + asm volatile ( // 4 pixel loop. LABELALIGN "1: \n" @@ -7869,7 +7867,7 @@ void ARGBAddRow_AVX2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { - asm( + asm volatile ( // 4 pixel loop. LABELALIGN "1: \n" @@ -7897,7 +7895,7 @@ void ARGBSubtractRow_SSE2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { - asm( + asm volatile ( // 4 pixel loop. LABELALIGN "1: \n" @@ -7925,7 +7923,7 @@ void ARGBSubtractRow_AVX2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { - asm( + asm volatile ( // 4 pixel loop. LABELALIGN "1: \n" @@ -9099,7 +9097,7 @@ void ARGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width) { uintptr_t pixel_temp; - asm( + asm volatile ( // 1 pixel loop. LABELALIGN "1: \n" @@ -9132,7 +9130,7 @@ void RGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width) { uintptr_t pixel_temp; - asm( + asm volatile ( // 1 pixel loop. LABELALIGN "1: \n" diff --git a/source/row_lsx.cc b/source/row_lsx.cc index 09f206cab..ee74cad9f 100644 --- a/source/row_lsx.cc +++ b/source/row_lsx.cc @@ -2805,7 +2805,8 @@ static void ARGBToYMatrixRow_LSX(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct RgbConstants* rgbconstants) { - asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants + asm volatile ( + "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants @@ -2863,7 +2864,8 @@ static void RGBAToYMatrixRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width, const struct RgbConstants* rgbconstants) { - asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants + asm volatile ( + "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants @@ -2920,7 +2922,8 @@ static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba, 7, 9, 10, 12, 13, 15, 1, 0, 4, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 25, 0, 28, 0, 31, 0, 2, 0, 5, 0, 8, 0, 11, 0, 14, 0}; - asm("vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants + asm volatile ( + "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants "vldrepl.b $vr2, %3, 2 \n\t" // load rgbconstants "vldrepl.h $vr3, %3, 4 \n\t" // load rgbconstants diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 8a4193196..bbe33842c 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -331,7 +331,8 @@ void I212ToAR30Row_NEON(const uint16_t* src_y, const uvec8* uv_coeff = &yuvconstants->kUVCoeff; const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; const uint16_t limit = 0x3ff0; - asm(YUVTORGB_SETUP + asm volatile ( + YUVTORGB_SETUP "dup v22.8h, %w[limit] \n" "movi v23.8h, #0xc0, lsl #8 \n" // A "1: \n" READYUV212 NVTORGB @@ -400,7 +401,8 @@ void I212ToARGBRow_NEON(const uint16_t* src_y, int width) { const uvec8* uv_coeff = &yuvconstants->kUVCoeff; const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; - asm(YUVTORGB_SETUP + asm volatile ( + YUVTORGB_SETUP "movi v19.8b, #255 \n" "1: \n" READYUV212 NVTORGB RGBTORGB8 "subs %w[width], %w[width], #8 \n" @@ -449,7 +451,8 @@ void I422ToAR30Row_NEON(const uint8_t* src_y, const uvec8* uv_coeff = &yuvconstants->kUVCoeff; const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; const uint16_t limit = 0x3ff0; - asm(YUVTORGB_SETUP + asm volatile ( + YUVTORGB_SETUP "dup v22.8h, %w[limit] \n" "movi v23.8h, #0xc0, lsl #8 \n" // A "1: \n" READYUV422 I4XXTORGB @@ -4134,7 +4137,8 @@ void ARGBColorMatrixRow_NEON_I8MM(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) { - asm("ld1 {v31.16b}, [%[matrix_argb]] \n" + asm volatile ( + "ld1 {v31.16b}, [%[matrix_argb]] \n" "1: \n" "ld1 {v0.16b, v1.16b}, [%[src_argb]], #32 \n" diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index b551ddd52..ff353bba3 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include @@ -66,18 +67,24 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { printf("Has SVE2 0x%x\n", has_sve2); printf("Has SME 0x%x\n", has_sme); -#if defined(__aarch64__) +#if __aarch64__ // Read and print the SVE and SME vector lengths. if (has_sve) { int sve_vl; - // rdvl x0, #1 - asm(".inst 0x04bf5020; mov %w0, w0" : "=r"(sve_vl)::"x0"); + asm(".inst 0x04bf5020 \n" // rdvl x0, #1 + "mov %w[sve_vl], w0 \n" + : [sve_vl] "=r"(sve_vl) // %[sve_vl] + : + : "x0"); printf("SVE vector length: %d bytes\n", sve_vl); } if (has_sme) { int sme_vl; - // rdsvl x0, #1 - asm(".inst 0x04bf5820; mov %w0, w0" : "=r"(sme_vl)::"x0"); + asm(".inst 0x04bf5820 \n" // rdsvl x0, #1 + "mov %w[sme_vl], w0 \n" + : [sme_vl] "=r"(sme_vl) // %[sme_vl] + : + : "x0"); printf("SME vector length: %d bytes\n", sme_vl); } #endif // defined(__aarch64__) diff --git a/util/cpuid.c b/util/cpuid.c index 725bc928b..52fb6bd99 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -69,18 +69,24 @@ int main(int argc, const char* argv[]) { printf("Has SVE2 0x%x\n", has_sve2); printf("Has SME 0x%x\n", has_sme); -#if defined(__aarch64__) +#if __aarch64__ // Read and print the SVE and SME vector lengths. if (has_sve) { int sve_vl; - // rdvl x0, #1 - asm(".inst 0x04bf5020; mov %w0, w0" : "=r"(sve_vl)::"x0"); + asm(".inst 0x04bf5020 \n" // rdvl x0, #1 + "mov %w[sve_vl], w0 \n" + : [sve_vl] "=r"(sve_vl) // %[sve_vl] + : + : "x0"); printf("SVE vector length: %d bytes\n", sve_vl); } if (has_sme) { int sme_vl; - // rdsvl x0, #1 - asm(".inst 0x04bf5820; mov %w0, w0" : "=r"(sme_vl)::"x0"); + asm(".inst 0x04bf5820 \n" // rdsvl x0, #1 + "mov %w[sme_vl], w0 \n" + : [sme_vl] "=r"(sme_vl) // %[sme_vl] + : + : "x0"); printf("SME vector length: %d bytes\n", sme_vl); } #endif // defined(__aarch64__)