diff --git a/README.chromium b/README.chromium index 34e2a98d3..a6a69907b 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1309 +Version: 1310 License: BSD License File: LICENSE diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index c592fc235..e36b4dfa2 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -61,12 +61,15 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, uint8* dst_rgb565, int dst_stride_rgb565, int width, int height); -// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes). -// Values in dither matrix from 0 to 255. 128 is best for no dither. +// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes). +// Values in dither matrix from 0 to 7 recommended. +// The order of the dither matrix is first byte is upper left. +// TODO(fbarchard): Consider pointer to 2d array for dither4x4. +// const uint8(*dither)[4][4]; LIBYUV_API int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, uint8* dst_rgb565, int dst_stride_rgb565, - const uint8* dither8x8, int width, int height); + const uint8* dither4x4, int width, int height); // Convert ARGB To ARGB1555. LIBYUV_API diff --git a/include/libyuv/row.h b/include/libyuv/row.h index ac9c17fe8..dbe882dba 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -910,11 +910,11 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8, int pix); + const uint32 dither4, int pix); void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8, int pix); + const uint32 dither4, int pix); void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8, int pix); + const uint32 dither4, int pix); void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); @@ -1384,9 +1384,9 @@ void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8, int pix); + const uint32 dither4, int pix); void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8, int pix); + const uint32 dither4, int pix); void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 5aef5ba07..027e34a3b 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1309 +#define LIBYUV_VERSION 1310 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index c0e784b66..c27e2118b 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -805,25 +805,21 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, } // Ordered 8x8 dither for 888 to 565. Values from 0 to 7. -static const uint8 kDither565_8x8[64] = { - 0 >> 5, 128 >> 5, 32 >> 5, 160 >> 5, 8 >> 5, 136 >> 5, 40 >> 5, 168 >> 5, - 192 >> 5, 64 >> 5, 224 >> 5, 96 >> 5, 200 >> 5, 72 >> 5, 232 >> 5, 104 >> 5, - 48 >> 5, 176 >> 5, 16 >> 5, 144 >> 5, 56 >> 5, 184 >> 5, 24 >> 5, 152 >> 5, - 240 >> 5, 112 >> 5, 208 >> 5, 80 >> 5, 248 >> 5, 120 >> 5, 216 >> 5, 88 >> 5, - 12 >> 5, 140 >> 5, 44 >> 5, 172 >> 5, 4 >> 5, 132 >> 5, 36 >> 5, 164 >> 5, - 204 >> 5, 76 >> 5, 236 >> 5, 108 >> 5, 196 >> 5, 68 >> 5, 228 >> 5, 100 >> 5, - 60 >> 5, 188 >> 5, 28 >> 5, 156 >> 5, 52 >> 5, 180 >> 5, 20 >> 5, 148 >> 5, - 252 >> 5, 124 >> 5, 220 >> 5, 92 >> 5, 244 >> 5, 116 >> 5, 212 >> 5, 84 >> 5, +static const uint8 kDither565_4x4[16] = { + 0, 4, 1, 5, + 6, 2, 7, 3, + 1, 5, 0, 4, + 7, 3, 6, 2, }; -// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes). +// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes). LIBYUV_API int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, uint8* dst_rgb565, int dst_stride_rgb565, - const uint8* dither8x8, int width, int height) { + const uint8* dither4x4, int width, int height) { int y; void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8x8, int pix) = ARGBToRGB565DitherRow_C; + const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; } @@ -832,13 +828,13 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } - if (!dither8x8) { - dither8x8 = kDither565_8x8; + if (!dither4x4) { + dither4x4 = kDither565_4x4; } #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 4)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2; } } @@ -853,7 +849,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, #endif for (y = 0; y < height; ++y) { ARGBToRGB565DitherRow(src_argb, dst_rgb565, - dither8x8 + ((y & 7) << 3), width); + *(uint32*)(dither4x4 + ((y & 3) << 2)), width); src_argb += src_stride_argb; dst_rgb565 += dst_stride_rgb565; } diff --git a/source/row_any.cc b/source/row_any.cc index d7dcca854..c44d0b7a1 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -227,17 +227,17 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C, 2, 4, 7) #define RGBDANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src, uint8* dst, \ - const uint8* dither8x8, int width) { \ + const uint32 dither4, int width) { \ int n = width & ~MASK; \ if (n > 0) { \ - ARGBTORGB_SIMD(src, dst, dither8x8, n); \ + ARGBTORGB_SIMD(src, dst, dither4, n); \ } \ - ARGBTORGB_C(src + n * SBPP, dst + n * BPP, dither8x8, width & MASK); \ + ARGBTORGB_C(src + n * SBPP, dst + n * BPP, dither4, width & MASK); \ } #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) RGBDANY(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2, - ARGBToRGB565DitherRow_C, 4, 2, 7) + ARGBToRGB565DitherRow_C, 4, 2, 3) #endif #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) RGBDANY(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2, diff --git a/source/row_common.cc b/source/row_common.cc index cd881351c..db8c41651 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -199,12 +199,20 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { } } +// dither4 is a row of 4 values from 4x4 dither matrix. +// The 4x4 matrix contains values to increase RGB. When converting to +// fewer bits (565) this provides an ordered dither. +// The order in the 4x4 matrix in first byte is upper left. +// The 4 values are passed as an int, then referenced as an array, so +// endian will not affect order of the original matrix. But the dither4 +// will containing the first pixel in the lower byte for little endian +// or the upper byte for big endian. void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8x8, int width) { + const uint32 dither4, int width) { int x; for (x = 0; x < width - 1; x += 2) { - int dither0 = dither8x8[x & 7]; - int dither1 = dither8x8[(x + 1) & 7]; + int dither0 = ((unsigned char*)(&dither4))[x & 3]; + int dither1 = ((unsigned char*)(&dither4))[(x + 1) & 3]; uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; @@ -217,7 +225,7 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, src_argb += 8; } if (width & 1) { - int dither0 = dither8x8[(width - 1) & 7]; + int dither0 = ((unsigned char*)(&dither4))[(width - 1) & 3]; uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; diff --git a/source/row_win.cc b/source/row_win.cc index c87f813df..22c654070 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -626,18 +626,17 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { // 8 pixels __declspec(naked) __declspec(align(16)) void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8, int pix) { + const uint32 dither4, int pix) { __asm { - mov eax, [esp + 12] // dither8 - movq xmm6, qword ptr [eax] // fetch 8 dither values - punpcklbw xmm6, xmm6 - movdqa xmm7, xmm6 - punpcklwd xmm6, xmm6 - punpckhwd xmm7, xmm7 mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb + movd xmm6, [esp + 12] // dither4 mov ecx, [esp + 16] // pix + punpcklbw xmm6, xmm6 // make dither 16 bytes + movdqa xmm7, xmm6 + punpcklwd xmm6, xmm6 + punpckhwd xmm7, xmm7 pcmpeqb xmm3, xmm3 // generate mask 0x0000001f psrld xmm3, 27 pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 @@ -648,7 +647,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, convertloop: movdqu xmm0, [eax] // fetch 4 pixels of argb - paddusb xmm0, xmm6 + paddusb xmm0, xmm6 // add dither movdqa xmm1, xmm0 // B movdqa xmm2, xmm0 // G pslld xmm0, 8 // R @@ -661,68 +660,46 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, por xmm1, xmm2 // BG por xmm0, xmm1 // BGR packssdw xmm0, xmm0 + lea eax, [eax + 16] movq qword ptr [edx], xmm0 // store 4 pixels of RGB565 - - movdqu xmm0, [eax + 16] // fetch 4 pixels of argb - paddusb xmm0, xmm7 - movdqa xmm1, xmm0 // B - movdqa xmm2, xmm0 // G - pslld xmm0, 8 // R - psrld xmm1, 3 // B - psrld xmm2, 5 // G - psrad xmm0, 16 // R - pand xmm1, xmm3 // B - pand xmm2, xmm4 // G - pand xmm0, xmm5 // R - por xmm1, xmm2 // BG - por xmm0, xmm1 // BGR - packssdw xmm0, xmm0 - movq qword ptr [edx + 8], xmm0 // store 4 pixels of RGB565 - - lea eax, [eax + 32] - lea edx, [edx + 16] - sub ecx, 8 + lea edx, [edx + 8] + sub ecx, 4 jg convertloop ret } } #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 -// TODO(fbarchard): Consider vpackusdw and remove vpsrad 16 __declspec(naked) __declspec(align(16)) void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint8* dither8, int pix) { + const uint32 dither4, int pix) { __asm { - mov eax, [esp + 12] // dither8 - vmovq xmm6, qword ptr [eax] // fetch 8 dither values - vpunpcklbw xmm6, xmm6, xmm6 - vpermq ymm6, ymm6, 0xd8 - vpunpcklwd ymm6, ymm6, ymm6 - mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb + vbroadcastss xmm6, [esp + 12] // dither4 mov ecx, [esp + 16] // pix + vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes + vpermq ymm6, ymm6, 0xd8 + vpunpcklwd ymm6, ymm6, ymm6 vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f vpsrld ymm3, ymm3, 27 vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 vpsrld ymm4, ymm4, 26 vpslld ymm4, ymm4, 5 - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xfffff800 - vpslld ymm5, ymm5, 11 + vpslld ymm5, ymm3, 11 // generate mask 0x0000f800 convertloop: vmovdqu ymm0, [eax] // fetch 8 pixels of argb - vpaddusb ymm0, ymm0, ymm6 + vpaddusb ymm0, ymm0, ymm6 // add dither vpsrld ymm2, ymm0, 5 // G vpsrld ymm1, ymm0, 3 // B - vpslld ymm0, ymm0, 8 // R + vpsrld ymm0, ymm0, 8 // R vpand ymm2, ymm2, ymm4 // G vpand ymm1, ymm1, ymm3 // B - vpsrad ymm0, ymm0, 16 // R vpand ymm0, ymm0, ymm5 // R vpor ymm1, ymm1, ymm2 // BG vpor ymm0, ymm0, ymm1 // BGR - vpackssdw ymm0, ymm0, ymm0 + vpackusdw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] vmovdqu [edx], xmm0 // store 8 pixels of RGB565 @@ -807,7 +784,6 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { } #ifdef HAS_ARGBTORGB565ROW_AVX2 -// TODO(fbarchard): Consider vpackusdw and remove vpsrad 16 __declspec(naked) __declspec(align(16)) void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { __asm { @@ -819,21 +795,19 @@ void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 vpsrld ymm4, ymm4, 26 vpslld ymm4, ymm4, 5 - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xfffff800 - vpslld ymm5, ymm5, 11 + vpslld ymm5, ymm3, 11 // generate mask 0x0000f800 convertloop: vmovdqu ymm0, [eax] // fetch 8 pixels of argb vpsrld ymm2, ymm0, 5 // G vpsrld ymm1, ymm0, 3 // B - vpslld ymm0, ymm0, 8 // R + vpsrld ymm0, ymm0, 8 // R vpand ymm2, ymm2, ymm4 // G vpand ymm1, ymm1, ymm3 // B - vpsrad ymm0, ymm0, 16 // R vpand ymm0, ymm0, ymm5 // R vpor ymm1, ymm1, ymm2 // BG vpor ymm0, ymm0, ymm1 // BGR - vpackssdw ymm0, ymm0, ymm0 + vpackusdw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] vmovdqu [edx], xmm0 // store 8 pixels of RGB565 diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 36c8bb936..1c0c68422 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1381,18 +1381,14 @@ TEST_F(libyuvTest, TestYToARGB) { } } -static const uint8 kNoDither8x8[64] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, +static const uint8 kNoDither4x4[16] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, }; -TEST_F(libyuvTest, TestDither) { +TEST_F(libyuvTest, TestNoDither) { align_buffer_64(src_argb, benchmark_width_ * benchmark_height_ * 4); align_buffer_64(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); align_buffer_64(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); @@ -1404,7 +1400,7 @@ TEST_F(libyuvTest, TestDither) { benchmark_width_, benchmark_height_); ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, dst_rgb565dither, benchmark_width_ * 2, - kNoDither8x8, benchmark_width_, benchmark_height_); + kNoDither4x4, benchmark_width_, benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]); } @@ -1414,4 +1410,46 @@ TEST_F(libyuvTest, TestDither) { free_aligned_buffer_64(dst_rgb565dither); } +// Ordered 4x4 dither for 888 to 565. Values from 0 to 7. +static const uint8 kDither565_4x4[16] = { + 0, 4, 1, 5, + 6, 2, 7, 3, + 1, 5, 0, 4, + 7, 3, 6, 2, +}; + +TEST_F(libyuvTest, TestDither) { + align_buffer_64(src_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_64(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + align_buffer_64(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); + align_buffer_64(dst_argb, benchmark_width_ * benchmark_height_ * 4); + align_buffer_64(dst_argbdither, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); + MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4); + MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4); + ARGBToRGB565(src_argb, benchmark_width_ * 4, + dst_rgb565, benchmark_width_ * 2, + benchmark_width_, benchmark_height_); + ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, + dst_rgb565dither, benchmark_width_ * 2, + kDither565_4x4, benchmark_width_, benchmark_height_); + RGB565ToARGB(dst_rgb565, benchmark_width_ * 2, + dst_argb, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2, + dst_argbdither, benchmark_width_ * 4, + benchmark_width_, benchmark_height_); + + for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { + EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9); + } + free_aligned_buffer_64(src_argb); + free_aligned_buffer_64(dst_rgb565); + free_aligned_buffer_64(dst_rgb565dither); + free_aligned_buffer_64(dst_argb); + free_aligned_buffer_64(dst_argbdither); +} + } // namespace libyuv