diff --git a/README.chromium b/README.chromium index 5074954c5..b0ed0b964 100644 --- a/README.chromium +++ b/README.chromium @@ -5,4 +5,4 @@ License: BSD License File: LICENSE Description: -libyuv is an open source project that includes YUV conversion and scaling functionality. +libyuv is an open source project that includes YUV conversion and scaling functionality. \ No newline at end of file diff --git a/include/libyuv/convert_argb.h b/include/libyuv/convert_argb.h index eb4ebd54a..a655e5b90 100644 --- a/include/libyuv/convert_argb.h +++ b/include/libyuv/convert_argb.h @@ -1864,7 +1864,7 @@ int I422ToRGBAMatrix(const uint8_t* src_y, int width, int height); -// Convert I422 to RGBA with matrix. +// Convert I420 to RGBA with matrix. LIBYUV_API int I420ToRGBAMatrix(const uint8_t* src_y, int src_stride_y, diff --git a/include/libyuv/version.h b/include/libyuv/version.h index acb50f9e6..fb6831cf3 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1810 +#define LIBYUV_VERSION 1811 #endif // INCLUDE_LIBYUV_VERSION_H_ \ No newline at end of file diff --git a/source/rotate.cc b/source/rotate.cc index 939e305ca..b274e8dba 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -29,10 +29,7 @@ void TransposePlane(const uint8_t* src, int width, int height) { int i = height; -#if defined(HAS_TRANSPOSEWX16_MSA) - void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst, - int dst_stride, int width) = TransposeWx16_C; -#elif defined(HAS_TRANSPOSEWX16_LSX) +#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX) void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) = TransposeWx16_C; #else @@ -40,24 +37,12 @@ void TransposePlane(const uint8_t* src, int dst_stride, int width) = TransposeWx8_C; #endif -#if defined(HAS_TRANSPOSEWX16_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - TransposeWx16 = TransposeWx16_Any_MSA; - if (IS_ALIGNED(width, 16)) { - TransposeWx16 = TransposeWx16_MSA; - } - } -#elif defined(HAS_TRANSPOSEWX16_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - TransposeWx16 = TransposeWx16_Any_LSX; - if (IS_ALIGNED(width, 16)) { - TransposeWx16 = TransposeWx16_LSX; - } - } -#else #if defined(HAS_TRANSPOSEWX8_NEON) if (TestCpuFlag(kCpuHasNEON)) { - TransposeWx8 = TransposeWx8_NEON; + TransposeWx8 = TransposeWx8_Any_NEON; + if (IS_ALIGNED(width, 8)) { + TransposeWx8 = TransposeWx8_NEON; + } } #endif #if defined(HAS_TRANSPOSEWX8_SSSE3) @@ -76,17 +61,24 @@ void TransposePlane(const uint8_t* src, } } #endif -#endif /* defined(HAS_TRANSPOSEWX16_MSA) */ - #if defined(HAS_TRANSPOSEWX16_MSA) - // Work across the source in 16x16 tiles - while (i >= 16) { - TransposeWx16(src, src_stride, dst, dst_stride, width); - src += 16 * src_stride; // Go down 16 rows. - dst += 16; // Move over 16 columns. - i -= 16; + if (TestCpuFlag(kCpuHasMSA)) { + TransposeWx16 = TransposeWx16_Any_MSA; + if (IS_ALIGNED(width, 16)) { + TransposeWx16 = TransposeWx16_MSA; + } } -#elif defined(HAS_TRANSPOSEWX16_LSX) +#endif +#if defined(HAS_TRANSPOSEWX16_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + TransposeWx16 = TransposeWx16_Any_LSX; + if (IS_ALIGNED(width, 16)) { + TransposeWx16 = TransposeWx16_LSX; + } + } +#endif + +#if defined(HAS_TRANSPOSEWX16_MSA) || defined(HAS_TRANSPOSEWX16_LSX) // Work across the source in 16x16 tiles while (i >= 16) { TransposeWx16(src, src_stride, dst, dst_stride, width); diff --git a/source/row_msa.cc b/source/row_msa.cc index 16d3f8390..ae2c2f32d 100644 --- a/source/row_msa.cc +++ b/source/row_msa.cc @@ -24,14 +24,14 @@ extern "C" { #define ALPHA_VAL (-1) // Fill YUV -> RGB conversion constants into vectors -#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, yg, yb) \ - { \ - ub = __msa_fill_w(yuvconst->kUVToB[0]); \ - vr = __msa_fill_w(yuvconst->kUVToR[1]); \ - ug = __msa_fill_w(yuvconst->kUVToG[0]); \ - vg = __msa_fill_w(yuvconst->kUVToG[1]); \ - yg = __msa_fill_w(yuvconst->kYToRgb[0]); \ - yb = __msa_fill_w(yuvconst->kYBiasToRgb[0]); \ +#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, yg, yb) \ + { \ + ub = __msa_fill_w(yuvconst->kUVToB[0]); \ + vr = __msa_fill_w(yuvconst->kUVToR[1]); \ + ug = __msa_fill_w(yuvconst->kUVToG[0]); \ + vg = __msa_fill_w(yuvconst->kUVToG[1]); \ + yg = __msa_fill_w(yuvconst->kYToRgb[0]); \ + yb = __msa_fill_w(yuvconst->kYBiasToRgb[0]); \ } // Load YUV 422 pixel data @@ -68,50 +68,50 @@ extern "C" { } // Convert 8 pixels of YUV 420 to RGB. -#define YUVTORGB(in_y, in_uv, ubvr, ugvg, yg, yb, out_b, out_g, out_r) \ - { \ - v8i16 vec0_m, vec1_m; \ - v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ - v4i32 reg5_m, reg6_m, reg7_m; \ - v16i8 zero_m = {0}; \ - \ - vec0_m = (v8i16)__msa_ilvr_b((v16i8)in_y, (v16i8)in_y); \ - vec1_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_uv); \ - reg0_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec0_m); \ - reg1_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec0_m); \ - reg2_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec1_m); \ - reg3_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec1_m); \ - reg0_m *= yg; \ - reg1_m *= yg; \ - reg2_m *= ubvr; \ - reg3_m *= ubvr; \ - reg0_m = __msa_srai_w(reg0_m, 16); \ - reg1_m = __msa_srai_w(reg1_m, 16); \ - reg0_m += yb; \ - reg1_m += yb; \ - reg4_m = __msa_dotp_s_w((v8i16)vec1_m, (v8i16)ugvg); \ - reg5_m = __msa_ilvev_w(reg2_m, reg2_m); \ - reg6_m = __msa_ilvev_w(reg3_m, reg3_m); \ - reg7_m = __msa_ilvr_w(reg4_m, reg4_m); \ - reg2_m = __msa_ilvod_w(reg2_m, reg2_m); \ - reg3_m = __msa_ilvod_w(reg3_m, reg3_m); \ - reg4_m = __msa_ilvl_w(reg4_m, reg4_m); \ - reg5_m = reg0_m - reg5_m; \ - reg6_m = reg1_m - reg6_m; \ - reg2_m = reg0_m - reg2_m; \ - reg3_m = reg1_m - reg3_m; \ - reg7_m = reg0_m - reg7_m; \ - reg4_m = reg1_m - reg4_m; \ - reg5_m = __msa_srai_w(reg5_m, 6); \ - reg6_m = __msa_srai_w(reg6_m, 6); \ - reg7_m = __msa_srai_w(reg7_m, 6); \ - reg4_m = __msa_srai_w(reg4_m, 6); \ - reg2_m = __msa_srai_w(reg2_m, 6); \ - reg3_m = __msa_srai_w(reg3_m, 6); \ - CLIP_0TO255(reg5_m, reg6_m, reg7_m, reg4_m, reg2_m, reg3_m); \ - out_b = __msa_pckev_h((v8i16)reg6_m, (v8i16)reg5_m); \ - out_g = __msa_pckev_h((v8i16)reg4_m, (v8i16)reg7_m); \ - out_r = __msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \ +#define YUVTORGB(in_y, in_uv, ubvr, ugvg, yg, yb, out_b, out_g, out_r) \ + { \ + v8i16 vec0_m, vec1_m; \ + v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ + v4i32 reg5_m, reg6_m, reg7_m; \ + v16i8 zero_m = {0}; \ + \ + vec0_m = (v8i16)__msa_ilvr_b((v16i8)in_y, (v16i8)in_y); \ + vec1_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_uv); \ + reg0_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec0_m); \ + reg1_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec0_m); \ + reg2_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec1_m); \ + reg3_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec1_m); \ + reg0_m *= yg; \ + reg1_m *= yg; \ + reg2_m *= ubvr; \ + reg3_m *= ubvr; \ + reg0_m = __msa_srai_w(reg0_m, 16); \ + reg1_m = __msa_srai_w(reg1_m, 16); \ + reg0_m += yb; \ + reg1_m += yb; \ + reg4_m = __msa_dotp_s_w((v8i16)vec1_m, (v8i16)ugvg); \ + reg5_m = __msa_ilvev_w(reg2_m, reg2_m); \ + reg6_m = __msa_ilvev_w(reg3_m, reg3_m); \ + reg7_m = __msa_ilvr_w(reg4_m, reg4_m); \ + reg2_m = __msa_ilvod_w(reg2_m, reg2_m); \ + reg3_m = __msa_ilvod_w(reg3_m, reg3_m); \ + reg4_m = __msa_ilvl_w(reg4_m, reg4_m); \ + reg5_m = reg0_m - reg5_m; \ + reg6_m = reg1_m - reg6_m; \ + reg2_m = reg0_m - reg2_m; \ + reg3_m = reg1_m - reg3_m; \ + reg7_m = reg0_m - reg7_m; \ + reg4_m = reg1_m - reg4_m; \ + reg5_m = __msa_srai_w(reg5_m, 6); \ + reg6_m = __msa_srai_w(reg6_m, 6); \ + reg7_m = __msa_srai_w(reg7_m, 6); \ + reg4_m = __msa_srai_w(reg4_m, 6); \ + reg2_m = __msa_srai_w(reg2_m, 6); \ + reg3_m = __msa_srai_w(reg3_m, 6); \ + CLIP_0TO255(reg5_m, reg6_m, reg7_m, reg4_m, reg2_m, reg3_m); \ + out_b = __msa_pckev_h((v8i16)reg6_m, (v8i16)reg5_m); \ + out_g = __msa_pckev_h((v8i16)reg4_m, (v8i16)reg7_m); \ + out_r = __msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \ } // Pack and Store 8 ARGB values. @@ -278,32 +278,32 @@ extern "C" { out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \ } -#define RGBTOUV(_tmpb, _tmpg, _tmpr, _nexb, _nexg, _nexr, _dst0) \ - { \ - v16u8 _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5; \ - v8i16 _reg0, _reg1, _reg2, _reg3, _reg4, _reg5; \ - _tmp0 = (v16u8)__msa_ilvev_b(_tmpb, _nexb); \ - _tmp1 = (v16u8)__msa_ilvod_b(_tmpb, _nexb); \ - _tmp2 = (v16u8)__msa_ilvev_b(_tmpg, _nexg); \ - _tmp3 = (v16u8)__msa_ilvod_b(_tmpg, _nexg); \ - _tmp4 = (v16u8)__msa_ilvev_b(_tmpr, _nexr); \ - _tmp5 = (v16u8)__msa_ilvod_b(_tmpr, _nexr); \ - _reg0 = (v8i16)__msa_hadd_u_h(_tmp0, _tmp0); \ - _reg1 = (v8i16)__msa_hadd_u_h(_tmp1, _tmp1); \ - _reg2 = (v8i16)__msa_hadd_u_h(_tmp2, _tmp2); \ - _reg3 = (v8i16)__msa_hadd_u_h(_tmp3, _tmp3); \ - _reg4 = (v8i16)__msa_hadd_u_h(_tmp4, _tmp4); \ - _reg5 = (v8i16)__msa_hadd_u_h(_tmp5, _tmp5); \ - _reg0 = (v8i16)__msa_aver_u_h(_reg0, _reg1); \ - _reg2 = (v8i16)__msa_aver_u_h(_reg2, _reg3); \ - _reg4 = (v8i16)__msa_aver_u_h(_reg4, _reg5); \ - _reg1 = (v8i16)__msa_maddv_h(const_112, _reg0, const_8080); \ - _reg3 = (v8i16)__msa_maddv_h(const_112, _reg4, const_8080); \ - _reg1 = (v8i16)__msa_msubv_h(_reg1, const_74, _reg2); \ - _reg3 = (v8i16)__msa_msubv_h(_reg3, const_94, _reg2); \ - _reg1 = (v8i16)__msa_msubv_h(_reg1, const_38, _reg4); \ - _reg3 = (v8i16)__msa_msubv_h(_reg3, const_18, _reg0); \ - _dst0 = (v16u8)__msa_pckod_b(_reg3, _reg1); \ +#define RGBTOUV(_tmpb, _tmpg, _tmpr, _nexb, _nexg, _nexr, _dst0) \ + { \ + v16u8 _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5; \ + v8i16 _reg0, _reg1, _reg2, _reg3, _reg4, _reg5; \ + _tmp0 = (v16u8)__msa_ilvev_b(_tmpb, _nexb); \ + _tmp1 = (v16u8)__msa_ilvod_b(_tmpb, _nexb); \ + _tmp2 = (v16u8)__msa_ilvev_b(_tmpg, _nexg); \ + _tmp3 = (v16u8)__msa_ilvod_b(_tmpg, _nexg); \ + _tmp4 = (v16u8)__msa_ilvev_b(_tmpr, _nexr); \ + _tmp5 = (v16u8)__msa_ilvod_b(_tmpr, _nexr); \ + _reg0 = (v8i16)__msa_hadd_u_h(_tmp0, _tmp0); \ + _reg1 = (v8i16)__msa_hadd_u_h(_tmp1, _tmp1); \ + _reg2 = (v8i16)__msa_hadd_u_h(_tmp2, _tmp2); \ + _reg3 = (v8i16)__msa_hadd_u_h(_tmp3, _tmp3); \ + _reg4 = (v8i16)__msa_hadd_u_h(_tmp4, _tmp4); \ + _reg5 = (v8i16)__msa_hadd_u_h(_tmp5, _tmp5); \ + _reg0 = (v8i16)__msa_aver_u_h(_reg0, _reg1); \ + _reg2 = (v8i16)__msa_aver_u_h(_reg2, _reg3); \ + _reg4 = (v8i16)__msa_aver_u_h(_reg4, _reg5); \ + _reg1 = (v8i16)__msa_maddv_h(const_112, _reg0, const_8080); \ + _reg3 = (v8i16)__msa_maddv_h(const_112, _reg4, const_8080); \ + _reg1 = (v8i16)__msa_msubv_h(_reg1, const_74, _reg2); \ + _reg3 = (v8i16)__msa_msubv_h(_reg3, const_94, _reg2); \ + _reg1 = (v8i16)__msa_msubv_h(_reg1, const_38, _reg4); \ + _reg3 = (v8i16)__msa_msubv_h(_reg3, const_18, _reg0); \ + _dst0 = (v16u8)__msa_pckod_b(_reg3, _reg1); \ } void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { @@ -1687,9 +1687,9 @@ void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, v16u8 reg0, reg1, reg2, dst; v8i16 tmpr_l, tmpr_r, tmpg_l, tmpg_r, tmpb_l, tmpb_r; v8i16 res0, res1; - v8i16 const_66 = (v8i16)__msa_ldi_h(66); + v8i16 const_66 = (v8i16)__msa_ldi_h(66); v8i16 const_129 = (v8i16)__msa_ldi_h(129); - v8i16 const_25 = (v8i16)__msa_ldi_h(25); + v8i16 const_25 = (v8i16)__msa_ldi_h(25); v8u16 const_1080 = (v8u16)__msa_fill_h(0x1080); v16u8 zero = (v16u8)__msa_ldi_b(0); @@ -1726,7 +1726,7 @@ void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, res1 = (v8i16)__msa_maddv_h(tmpg_l, const_129, res1); res0 = (v8i16)__msa_maddv_h(tmpr_r, const_66, res0); res1 = (v8i16)__msa_maddv_h(tmpr_l, const_66, res1); - dst = (v16u8)__msa_pckod_b(res1, res0); + dst = (v16u8)__msa_pckod_b(res1, res0); ST_UB(dst, dst_y); src_argb1555 += 32; dst_y += 16; @@ -1739,9 +1739,9 @@ void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { v16u8 reg0, reg1, dst; v8i16 tmpr_l, tmpr_r, tmpg_l, tmpg_r, tmpb_l, tmpb_r; v8i16 res0, res1; - v8i16 const_66 = (v8i16)__msa_ldi_h(66); + v8i16 const_66 = (v8i16)__msa_ldi_h(66); v8i16 const_129 = (v8i16)__msa_ldi_h(129); - v8i16 const_25 = (v8i16)__msa_ldi_h(25); + v8i16 const_25 = (v8i16)__msa_ldi_h(25); v8i16 const_1080 = (v8i16)__msa_fill_h(0x1080); v16u8 zero = __msa_ldi_b(0); @@ -1776,7 +1776,7 @@ void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { res1 = (v8i16)__msa_maddv_h(tmpg_l, const_129, res1); res0 = (v8i16)__msa_maddv_h(tmpr_r, const_66, res0); res1 = (v8i16)__msa_maddv_h(tmpr_l, const_66, res1); - dst = (v16u8)__msa_pckod_b(res1, res0); + dst = (v16u8)__msa_pckod_b(res1, res0); ST_UB(dst, dst_y); src_rgb565 += 32; dst_y += 16; @@ -1879,10 +1879,10 @@ void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, v16u8 reg0, reg1, reg2, reg3; v16u8 tmpb, tmpg, tmpr, nexb, nexg, nexr; v8i16 const_112 = (v8i16)__msa_ldi_h(0x38); - v8i16 const_74 = (v8i16)__msa_ldi_h(0x25); - v8i16 const_38 = (v8i16)__msa_ldi_h(0x13); - v8i16 const_94 = (v8i16)__msa_ldi_h(0x2F); - v8i16 const_18 = (v8i16)__msa_ldi_h(0x09); + v8i16 const_74 = (v8i16)__msa_ldi_h(0x25); + v8i16 const_38 = (v8i16)__msa_ldi_h(0x13); + v8i16 const_94 = (v8i16)__msa_ldi_h(0x2F); + v8i16 const_18 = (v8i16)__msa_ldi_h(0x09); v8u16 const_8080 = (v8u16)__msa_fill_h(0x8080); for (x = 0; x < width; x += 16) { @@ -1952,10 +1952,10 @@ void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, v16u8 reg0, reg1, reg2, reg3; v16u8 tmpb, tmpg, tmpr, nexb, nexg, nexr; v8i16 const_112 = (v8i16)__msa_ldi_h(0x38); - v8i16 const_74 = (v8i16)__msa_ldi_h(0x25); - v8i16 const_38 = (v8i16)__msa_ldi_h(0x13); - v8i16 const_94 = (v8i16)__msa_ldi_h(0x2F); - v8i16 const_18 = (v8i16)__msa_ldi_h(0x09); + v8i16 const_74 = (v8i16)__msa_ldi_h(0x25); + v8i16 const_38 = (v8i16)__msa_ldi_h(0x13); + v8i16 const_94 = (v8i16)__msa_ldi_h(0x2F); + v8i16 const_18 = (v8i16)__msa_ldi_h(0x09); v8u16 const_8080 = (v8u16)__msa_fill_h(0x8080); for (x = 0; x < width; x += 16) { diff --git a/source/row_neon.cc b/source/row_neon.cc index 4781e2f6a..e10632364 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -585,11 +585,11 @@ void DetileRow_NEON(const uint8_t* src, int width) { asm volatile( "1: \n" - "vld1.16 {q0}, [%0], %3 \n" // load 16 bytes - "subs %2, %2, #16 \n" // 16 processed per loop - "pld [%0, 1792] \n" - "vst1.16 {q0}, [%1]! \n" // store 16 bytes - "bgt 1b \n" + "vld1.16 {q0}, [%0], %3 \n" // load 16 bytes + "subs %2, %2, #16 \n" // 16 processed per loop + "pld [%0, 1792] \n" + "vst1.16 {q0}, [%1]! \n" // store 16 bytes + "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 @@ -608,7 +608,7 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv, "1: \n" "vld2.8 {d0, d1}, [%0], %4 \n" "subs %3, %3, #16 \n" - "pld [%0, 1792] \n" + "pld [%0, 1792] \n" "vst1.8 {d0}, [%1]! \n" "vst1.8 {d1}, [%2]! \n" "bgt 1b \n" diff --git a/source/row_neon64.cc b/source/row_neon64.cc index 442e60cdc..7139ead72 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -637,7 +637,7 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv, "1: \n" "ld2 {v0.8b,v1.8b}, [%0], %4 \n" "subs %w3, %w3, #16 \n" - "prfm pldl1keep, [%0, 1792] \n" + "prfm pldl1keep, [%0, 1792] \n" "st1 {v0.8b}, [%1], #8 \n" "st1 {v1.8b}, [%2], #8 \n" "b.gt 1b \n" diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 20ac91c83..df3dce197 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -433,7 +433,7 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) "DST_SUBSAMP_Y unsupported"); \ const int kWidth = W1280; \ const int kHeight = benchmark_height_; \ - const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ + const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \ diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 118259876..1e61e5c74 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -1503,15 +1503,15 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { // Disable all optimizations. MaskCpuFlags(disable_cpu_flags_); for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, - benchmark_width_, benchmark_height_, 16); + DetilePlane(orig_y, orig_width, dst_c, benchmark_width_, benchmark_width_, + benchmark_height_, 16); } // Enable optimizations. MaskCpuFlags(benchmark_cpu_info_); for (j = 0; j < benchmark_iterations_; j++) { - DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, - benchmark_width_, benchmark_height_, 16); + DetilePlane(orig_y, orig_width, dst_opt, benchmark_width_, benchmark_width_, + benchmark_height_, 16); } for (i = 0; i < y_plane_size; ++i) {