From c4e032c543c9b3e8a9edcfcb3e5bdc3ceb6800f2 Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Wed, 4 Feb 2015 19:45:26 +0000 Subject: [PATCH] change Y multiplier and bias to compensate for 257/256 which makes YToARGB exactly match float math. Histogram Before hist -3 -2 -1 0 1 2 3 red 0 0 1809408 13140736 1827072 0 0 green 0 0 1679912 13471329 1625975 0 0 blue 168448 994816 1876480 10655488 1893376 1006336 182272 Histogram After hist -3 -2 -1 0 1 2 3 red 0 0 558848 15632128 586240 0 0 green 0 0 209907 16350588 216721 0 0 blue 14848 642816 1989376 11363328 2053120 695040 18688 BUG=394 TESTED=more stringent luma tests R=brucedawson@google.com Review URL: https://webrtc-codereview.appspot.com/38859004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1259 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/version.h | 2 +- source/row_common.cc | 5 +++-- source/row_posix.cc | 16 ++++++++-------- source/row_win.cc | 28 ++++++++++++++-------------- unit_test/color_test.cc | 12 ++++++------ unit_test/convert_test.cc | 2 +- 7 files changed, 34 insertions(+), 33 deletions(-) diff --git a/README.chromium b/README.chromium index dbf6f522f..c77286776 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1258 +Version: 1259 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 7a05b94a1..d0b28eb72 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1258 +#define LIBYUV_VERSION 1259 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_common.cc b/source/row_common.cc index 778149f58..16327145b 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -962,8 +962,9 @@ void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { // YUV to RGB conversion constants. // Y contribution to R,G,B. Scale and bias. -#define YG 19071 /* round(1.164 * 64 * 256) */ -#define YGB 1197 /* 1.164 * 64 * 16 - adjusted for even error distribution */ +// TODO(fbarchard): Consider moving constants into a common header. +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */ // U and V contributions to R,G,B. #define UB -128 /* -min(128, round(2.018 * 64)) */ diff --git a/source/row_posix.cc b/source/row_posix.cc index 6f30dbbfb..3cec55027 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -1522,8 +1522,8 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, // YUV to RGB conversion constants. // Y contribution to R,G,B. Scale and bias. -#define YG 19071 /* round(1.164 * 64 * 256) */ -#define YGB 1197 /* 1.164 * 64 * 16 - adjusted for even error distribution */ +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */ // U and V contributions to R,G,B. #define UB -128 /* -min(128, round(2.018 * 64)) */ @@ -2296,14 +2296,14 @@ void YToARGBRow_SSE2(const uint8* y_buf, uint8* dst_argb, int width) { asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" - "mov $0x04ad04ad,%%eax \n" // 04ad = 1197 = 1.164 * 16 - "movd %%eax,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "mov $0x4a7f4a7f,%%eax \n" // 4a7f = 19071 = 1.164 + "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 "movd %%eax,%%xmm2 \n" "pshufd $0x0,%%xmm2,%%xmm2 \n" + "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * 16 + "movd %%eax,%%xmm3 \n" + "pshufd $0x0,%%xmm3,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "pslld $0x18,%%xmm4 \n" LABELALIGN "1: \n" // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 diff --git a/source/row_win.cc b/source/row_win.cc index a906968f9..0a89e28c1 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -26,8 +26,8 @@ extern "C" { // YUV to RGB conversion constants. // Y contribution to R,G,B. Scale and bias. -#define YG 19071 /* round(1.164 * 64 * 256) */ -#define YGB 1197 /* 1.164 * 64 * 16 - adjusted for even error distribution */ +#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ +#define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */ // U and V contributions to R,G,B. #define UB -128 /* -min(128, round(2.018 * 64)) */ @@ -2306,14 +2306,14 @@ void YToARGBRow_SSE2(const uint8* y_buf, uint8* rgb_buf, int width) { __asm { - pcmpeqb xmm4, xmm4 // generate mask 0xff000000 - pslld xmm4, 24 - mov eax, 0x04ad04ad // 04ad = 1197 = round(1.164 * 64 * 16) - movd xmm3, eax - pshufd xmm3, xmm3, 0 - mov eax, 0x4a7f4a7f // 4a7f = 19071 = round(1.164 * 64 * 256) + mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) movd xmm2, eax pshufd xmm2, xmm2,0 + mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16) + movd xmm3, eax + pshufd xmm3, xmm3, 0 + pcmpeqb xmm4, xmm4 // generate mask 0xff000000 + pslld xmm4, 24 mov eax, [esp + 4] // Y mov edx, [esp + 8] // rgb @@ -2348,6 +2348,7 @@ void YToARGBRow_SSE2(const uint8* y_buf, #ifdef HAS_YTOARGBROW_AVX2 // 16 pixels of Y converted to 16 pixels of ARGB (64 bytes). +// note: vpunpcklbw mutates and vpackuswb unmutates. __declspec(naked) __declspec(align(16)) void YToARGBRow_AVX2(const uint8* y_buf, uint8* rgb_buf, @@ -2355,10 +2356,10 @@ void YToARGBRow_AVX2(const uint8* y_buf, __asm { vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000 vpslld ymm4, ymm4, 24 - mov eax, 0x04ad04ad // 04ad = 1197 = round(1.164 * 64 * 16) + mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16) vmovd xmm3, eax vbroadcastss ymm3, xmm3 - mov eax, 0x4a7f4a7f // 4a7f = 19071 = round(1.164 * 64 * 256) + mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) vmovd xmm2, eax vbroadcastss ymm2, xmm2 @@ -2370,7 +2371,7 @@ void YToARGBRow_AVX2(const uint8* y_buf, // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164 vmovdqu xmm0, [eax] lea eax, [eax + 16] - vpermq ymm0, ymm0, 0xd8 + vpermq ymm0, ymm0, 0xd8 // vpunpcklbw mutates vpunpcklbw ymm0, ymm0, ymm0 // Y.Y vpmulhuw ymm0, ymm0, ymm2 vpsubusw ymm0, ymm0, ymm3 @@ -2381,8 +2382,8 @@ void YToARGBRow_AVX2(const uint8* y_buf, // Step 2: Weave into ARGB vpunpcklbw ymm1, ymm0, ymm0 // GG - mutates vpermq ymm1, ymm1, 0xd8 - vpunpcklwd ymm0, ymm1, ymm1 // GGGG first 4 pixels - vpunpckhwd ymm1, ymm1, ymm1 // GGGG next 4 pixels + vpunpcklwd ymm0, ymm1, ymm1 // GGGG first 8 pixels + vpunpckhwd ymm1, ymm1, ymm1 // GGGG next 8 pixels vpor ymm0, ymm0, ymm4 vpor ymm1, ymm1, ymm4 vmovdqu [edx], ymm0 @@ -2396,7 +2397,6 @@ void YToARGBRow_AVX2(const uint8* y_buf, } #endif // HAS_YTOARGBROW_AVX2 - #ifdef HAS_MIRRORROW_SSSE3 // Shuffle table for reversing the bytes. static const uvec8 kShuffleMirror = { diff --git a/unit_test/color_test.cc b/unit_test/color_test.cc index 3baf33935..1fc4bd56b 100644 --- a/unit_test/color_test.cc +++ b/unit_test/color_test.cc @@ -227,7 +227,7 @@ TEST_F(libyuvTest, TestYUV) { EXPECT_EQ(255, b0); YUVToRGB(240, 255, 0, &r1, &g1, &b1); - EXPECT_NEAR(56, r1, 1); + EXPECT_EQ(57, r1); EXPECT_EQ(255, g1); EXPECT_EQ(255, b1); @@ -238,9 +238,9 @@ TEST_F(libyuvTest, TestYUV) { EXPECT_EQ(2, b0); YUVToRGB(240, 0, 0, &r1, &g1, &b1); - EXPECT_NEAR(56, r1, 1); + EXPECT_EQ(57, r1); EXPECT_EQ(255, g1); - EXPECT_NEAR(6, b1, 1); + EXPECT_EQ(5, b1); for (int i = 0; i < 256; ++i) { YUVToRGBReference(i, 128, 128, &r0, &g0, &b0); @@ -281,9 +281,9 @@ TEST_F(libyuvTest, TestGreyYUV) { YUVToRGBReference(y, 128, 128, &r0, &g0, &b0); YUVToRGB(y, 128, 128, &r1, &g1, &b1); YToRGB(y, &r2, &g2, &b2); - EXPECT_NEAR(r0, r1, ERROR_R); - EXPECT_NEAR(g0, g1, ERROR_G); - EXPECT_NEAR(b0, b1, ERROR_B); + EXPECT_EQ(r0, r1); + EXPECT_EQ(g0, g1); + EXPECT_EQ(b0, b1); EXPECT_EQ(r1, r2); EXPECT_EQ(g1, g2); EXPECT_EQ(b1, b2); diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 37eb97dbc..68ea0f64e 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1262,7 +1262,7 @@ TEST_F(libyuvTest, TestYToARGB) { argb[i * 4 + 3]); } for (int i = 0; i < 32; ++i) { - EXPECT_NEAR(expectedg[i], argb[i * 4 + 0], 1); + EXPECT_EQ(expectedg[i], argb[i * 4 + 0]); } }