diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index 4100170ae..8f528abe3 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -547,7 +547,6 @@ int ARGB4444ToARGB(const uint8_t* src_argb4444, int width, int height); - // Aliases #define AB30ToARGB AR30ToABGR #define AB30ToABGR AR30ToARGB @@ -562,7 +561,6 @@ int AR30ToARGB(const uint8_t* src_ar30, int width, int height); - // Convert AR30 To ABGR. LIBYUV_API int AR30ToABGR(const uint8_t* src_ar30, diff --git a/source/convert_argb.cc b/source/convert_argb.cc index ff91602e4..0f14a89ab 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -512,8 +512,8 @@ int I010ToAB30(const uint16_t* src_y, int dst_stride_ab30, int width, int height) { - return I010ToAR30Matrix(src_y, src_stride_y, src_v, - src_stride_v, src_u, src_stride_u, dst_ab30, dst_stride_ab30, + return I010ToAR30Matrix(src_y, src_stride_y, src_v, src_stride_v, src_u, + src_stride_u, dst_ab30, dst_stride_ab30, &kYvuI601Constants, width, height); } @@ -529,8 +529,8 @@ int H010ToAB30(const uint16_t* src_y, int dst_stride_ab30, int width, int height) { - return I010ToAR30Matrix(src_y, src_stride_y, src_v, - src_stride_v, src_u, src_stride_u, dst_ab30, dst_stride_ab30, + return I010ToAR30Matrix(src_y, src_stride_y, src_v, src_stride_v, src_u, + src_stride_u, dst_ab30, dst_stride_ab30, &kYvuH709Constants, width, height); } diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index 834da76f2..5a6e05376 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -14,7 +14,7 @@ #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/row.h" -#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */ +#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */ #ifdef __cplusplus namespace libyuv { @@ -29,9 +29,9 @@ static void ARGBTranspose(const uint8_t* src_argb, int height) { int i; int src_pixel_step = src_stride_argb >> 2; - void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride_argb, - int src_step, uint8_t* dst_argb, int dst_width) = - ScaleARGBRowDownEven_C; + void (*ScaleARGBRowDownEven)( + const uint8_t* src_argb, ptrdiff_t src_stride_argb, int src_step, + uint8_t* dst_argb, int dst_width) = ScaleARGBRowDownEven_C; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_SSE2; @@ -75,7 +75,8 @@ void ARGBRotate90(const uint8_t* src_argb, // of the buffer and flip the sign of the source stride. src_argb += src_stride_argb * (height - 1); src_stride_argb = -src_stride_argb; - ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); + ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, + height); } void ARGBRotate270(const uint8_t* src_argb, @@ -89,7 +90,8 @@ void ARGBRotate270(const uint8_t* src_argb, // of the buffer and flip the sign of the destination stride. dst_argb += dst_stride_argb * (width - 1); dst_stride_argb = -dst_stride_argb; - ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); + ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, + height); } void ARGBRotate180(const uint8_t* src_argb, @@ -106,7 +108,8 @@ void ARGBRotate180(const uint8_t* src_argb, int y; void (*ARGBMirrorRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBMirrorRow_C; - void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = CopyRow_C; + void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = + CopyRow_C; #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; @@ -164,7 +167,7 @@ void ARGBRotate180(const uint8_t* src_argb, for (y = 0; y < half_height; ++y) { ARGBMirrorRow(src_argb, row, width); // Mirror first row into a buffer ARGBMirrorRow(src_bot, dst_argb, width); // Mirror last row into first row - CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last + CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last src_argb += src_stride_argb; dst_argb += dst_stride_argb; src_bot -= src_stride_argb; diff --git a/source/row_neon.cc b/source/row_neon.cc index 93a3497d2..3bb83e39d 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -113,9 +113,12 @@ void I444ToARGBRow_NEON(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) { asm volatile( + YUVTORGB_SETUP "vmov.u8 d23, #255 \n" - "1: \n" READYUV444 YUVTORGB + "1: \n" + + READYUV444 YUVTORGB "subs %4, %4, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%3]! \n" "bgt 1b \n" @@ -139,9 +142,12 @@ void I422ToARGBRow_NEON(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) { asm volatile( + YUVTORGB_SETUP "vmov.u8 d23, #255 \n" - "1: \n" READYUV422 YUVTORGB + "1: \n" + + READYUV422 YUVTORGB "subs %4, %4, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%3]! \n" "bgt 1b \n" @@ -166,8 +172,12 @@ void I422AlphaToARGBRow_NEON(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) { asm volatile( + YUVTORGB_SETUP - "1: \n" READYUV422 YUVTORGB + + "1: \n" + + READYUV422 YUVTORGB "subs %5, %5, #8 \n" "vld1.8 {d23}, [%3]! \n" "vst4.8 {d20, d21, d22, d23}, [%4]! \n" @@ -194,7 +204,9 @@ void I422ToRGBARow_NEON(const uint8_t* src_y, int width) { asm volatile( YUVTORGB_SETUP - "1: \n" READYUV422 YUVTORGB + "1: \n" + + READYUV422 YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d19, #255 \n" // d19 modified by // YUVTORGB @@ -220,8 +232,12 @@ void I422ToRGB24Row_NEON(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) { asm volatile( + YUVTORGB_SETUP - "1: \n" READYUV422 YUVTORGB + + "1: \n" + + READYUV422 YUVTORGB "subs %4, %4, #8 \n" "vst3.8 {d20, d21, d22}, [%3]! \n" "bgt 1b \n" @@ -253,8 +269,13 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y, int width) { asm volatile( YUVTORGB_SETUP - "1: \n" READYUV422 YUVTORGB - "subs %4, %4, #8 \n" ARGBTORGB565 + "1: \n" + + READYUV422 YUVTORGB + + "subs %4, %4, #8 \n" + + ARGBTORGB565 "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. "bgt 1b \n" : "+r"(src_y), // %0 @@ -286,10 +307,15 @@ void I422ToARGB1555Row_NEON(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) { asm volatile( + YUVTORGB_SETUP - "1: \n" READYUV422 YUVTORGB + "1: \n" + + READYUV422 YUVTORGB "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" ARGBTOARGB1555 + "vmov.u8 d23, #255 \n" + + ARGBTOARGB1555 "vst1.8 {q0}, [%3]! \n" // store 8 pixels // ARGB1555. "bgt 1b \n" @@ -325,9 +351,13 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y, YUVTORGB_SETUP "vmov.u8 d4, #0x0f \n" // bits to clear with // vbic. - "1: \n" READYUV422 YUVTORGB + "1: \n" + + READYUV422 YUVTORGB "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" ARGBTOARGB4444 + "vmov.u8 d23, #255 \n" + + ARGBTOARGB4444 "vst1.8 {q0}, [%3]! \n" // store 8 pixels // ARGB4444. "bgt 1b \n" @@ -346,9 +376,12 @@ void I422ToARGB4444Row_NEON(const uint8_t* src_y, void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( + YUVTORGB_SETUP "vmov.u8 d23, #255 \n" - "1: \n" READYUV400 YUVTORGB + "1: \n" + + READYUV400 YUVTORGB "subs %2, %2, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n" "bgt 1b \n" @@ -385,22 +418,27 @@ void NV12ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile(YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" READNV12 YUVTORGB - "subs %3, %3, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : [kUVToRB] "r"(&yuvconstants->kUVToRB), - [kUVToG] "r"(&yuvconstants->kUVToG), - [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), - [kYToRgb] "r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", - "q10", "q11", "q12", "q13", "q14", "q15"); + asm volatile( + + YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + + "1: \n" + + READNV12 YUVTORGB + "subs %3, %3, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_uv), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } void NV21ToARGBRow_NEON(const uint8_t* src_y, @@ -408,22 +446,27 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile(YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" READNV21 YUVTORGB - "subs %3, %3, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_vu), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : [kUVToRB] "r"(&yuvconstants->kUVToRB), - [kUVToG] "r"(&yuvconstants->kUVToG), - [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), - [kYToRgb] "r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", - "q10", "q11", "q12", "q13", "q14", "q15"); + asm volatile( + + YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + + "1: \n" + + READNV21 YUVTORGB + "subs %3, %3, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%2]! \n" + "bgt 1b \n" + : "+r"(src_y), // %0 + "+r"(src_vu), // %1 + "+r"(dst_argb), // %2 + "+r"(width) // %3 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } void NV12ToRGB565Row_NEON(const uint8_t* src_y, @@ -433,8 +476,13 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y, int width) { asm volatile( YUVTORGB_SETUP - "1: \n" READNV12 YUVTORGB - "subs %3, %3, #8 \n" ARGBTORGB565 + "1: \n" + + READNV12 YUVTORGB + + "subs %3, %3, #8 \n" + + ARGBTORGB565 "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. "bgt 1b \n" : "+r"(src_y), // %0 @@ -453,42 +501,50 @@ void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile(YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" READYUY2 YUVTORGB - "subs %2, %2, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : [kUVToRB] "r"(&yuvconstants->kUVToRB), - [kUVToG] "r"(&yuvconstants->kUVToG), - [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), - [kYToRgb] "r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", - "q10", "q11", "q12", "q13", "q14", "q15"); + asm volatile( + + YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" + + READYUY2 YUVTORGB + "subs %2, %2, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_yuy2), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { - asm volatile(YUVTORGB_SETUP - "vmov.u8 d23, #255 \n" - "1: \n" READUYVY YUVTORGB - "subs %2, %2, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : [kUVToRB] "r"(&yuvconstants->kUVToRB), - [kUVToG] "r"(&yuvconstants->kUVToG), - [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), - [kYToRgb] "r"(&yuvconstants->kYToRgb) - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", - "q10", "q11", "q12", "q13", "q14", "q15"); + asm volatile( + + YUVTORGB_SETUP + "vmov.u8 d23, #255 \n" + "1: \n" + + READUYVY YUVTORGB + "subs %2, %2, #8 \n" + "vst4.8 {d20, d21, d22, d23}, [%1]! \n" + "bgt 1b \n" + : "+r"(src_uyvy), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : [kUVToRB] "r"(&yuvconstants->kUVToRB), + [kUVToG] "r"(&yuvconstants->kUVToG), + [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), + [kYToRgb] "r"(&yuvconstants->kYToRgb) + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index c0daac155..fd02c7929 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -526,8 +526,8 @@ TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2) } \ double time2 = get_time(); \ printf(" %8d us C - %8d us OPT\n", \ - static_cast((time1 - time0) * 1e6), \ - static_cast((time2 - time1) * 1e6 / benchmark_iterations_)); \ + static_cast((time1 - time0) * 1e6), \ + static_cast((time2 - time1) * 1e6 / benchmark_iterations_)); \ int max_diff = 0; \ /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \ diff --git a/unit_test/unit_test.h b/unit_test/unit_test.h index dee3952fd..87907fa16 100644 --- a/unit_test/unit_test.h +++ b/unit_test/unit_test.h @@ -126,9 +126,9 @@ class LibYUVColorTest : public ::testing::Test { protected: LibYUVColorTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -138,9 +138,9 @@ class LibYUVConvertTest : public ::testing::Test { protected: LibYUVConvertTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -150,9 +150,9 @@ class LibYUVScaleTest : public ::testing::Test { protected: LibYUVScaleTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -162,9 +162,9 @@ class LibYUVRotateTest : public ::testing::Test { protected: LibYUVRotateTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -174,9 +174,9 @@ class LibYUVPlanarTest : public ::testing::Test { protected: LibYUVPlanarTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -186,9 +186,9 @@ class LibYUVBaseTest : public ::testing::Test { protected: LibYUVBaseTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD. @@ -198,9 +198,9 @@ class LibYUVCompareTest : public ::testing::Test { protected: LibYUVCompareTest(); - int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. - int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. - int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. + int benchmark_iterations_; // Default 1. Use 1000 for benchmarking. + int benchmark_width_; // Default 1280. Use 640 for benchmarking VGA. + int benchmark_height_; // Default 720. Use 360 for benchmarking VGA. int benchmark_pixels_div1280_; // Total pixels to benchmark / 1280. int disable_cpu_flags_; // Default 1. Use -1 for benchmarking. int benchmark_cpu_info_; // Default -1. Use 1 to disable SIMD.