diff --git a/README.chromium b/README.chromium index b2c02eadc..e1cde060d 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 790 +Version: 791 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index d453522d4..5ab299a73 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -136,15 +136,16 @@ extern "C" { #define HAS_YTOARGBROW_SSE2 #define HAS_YUY2TOARGBROW_SSSE3 #define HAS_YUY2TOYROW_SSE2 + +// Effects: +#define HAS_ARGBCOLORTABLEROW_X86 +#define HAS_RGBCOLORTABLEROW_X86 #endif // The following are Windows only: // TODO(fbarchard): Port to gcc. #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) // Effects: -// Disabled due to C versions being faster on Sandy Bridge with VC2012. -// #define HAS_ARGBCOLORTABLEROW_X86 -// #define HAS_RGBCOLORTABLEROW_X86 // SSSE3 version incomplete: // #define HAS_ARGBLUMACOLORTABLEROW_SSSE3 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 12655dc69..3b5f932d0 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 790 +#define LIBYUV_VERSION 791 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/row_posix.cc b/source/row_posix.cc index 11d41bb01..9a1300f22 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -5875,6 +5875,66 @@ void ARGBPolynomialRow_SSE2(const uint8* src_argb, } #endif // HAS_ARGBPOLYNOMIALROW_SSE2 +#ifdef HAS_ARGBCOLORTABLEROW_X86 +// Tranform ARGB pixels with color table. +void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, + int width) { + uintptr_t pixel_temp = 0u; + asm volatile ( + // 1 pixel loop. + ".p2align 4 \n" + "1: \n" + "movzb (%0),%1 \n" + "lea 0x4(%0),%0 \n" + "movzb (%3,%1,4),%1 \n" + "mov %b1,-0x4(%0) \n" + "movzb -0x3(%0),%1 \n" + "movzb 0x1(%3,%1,4),%1 \n" + "mov %b1,-0x3(%0) \n" + "movzb -0x2(%0),%1 \n" + "movzb 0x2(%3,%1,4),%1 \n" + "mov %b1,-0x2(%0) \n" + "movzb -0x1(%0),%1 \n" + "movzb 0x3(%3,%1,4),%1 \n" + "mov %b1,-0x1(%0) \n" + "dec %2 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "+d"(pixel_temp), // %1 + "+r"(width) // %2 + : "r"(table_argb) // %3 + : "memory", "cc"); +} +#endif // HAS_ARGBCOLORTABLEROW_X86 + +#ifdef HAS_RGBCOLORTABLEROW_X86 +// Tranform RGB pixels with color table. +void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { + uintptr_t pixel_temp = 0u; + asm volatile ( + // 1 pixel loop. + ".p2align 4 \n" + "1: \n" + "movzb (%0),%1 \n" + "lea 0x4(%0),%0 \n" + "movzb (%3,%1,4),%1 \n" + "mov %b1,-0x4(%0) \n" + "movzb -0x3(%0),%1 \n" + "movzb 0x1(%3,%1,4),%1 \n" + "mov %b1,-0x3(%0) \n" + "movzb -0x2(%0),%1 \n" + "movzb 0x2(%3,%1,4),%1 \n" + "mov %b1,-0x2(%0) \n" + "dec %2 \n" + "jg 1b \n" + : "+r"(dst_argb), // %0 + "+d"(pixel_temp), // %1 + "+r"(width) // %2 + : "r"(table_argb) // %3 + : "memory", "cc"); +} +#endif // HAS_RGBCOLORTABLEROW_X86 + #endif // defined(__x86_64__) || defined(__i386__) #ifdef __cplusplus diff --git a/source/row_win.cc b/source/row_win.cc index 273c35066..0e1af3a88 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -5097,43 +5097,28 @@ __declspec(naked) __declspec(align(16)) void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { __asm { - push ebx push esi - push edi - push ebp - mov eax, [esp + 16 + 4] /* dst_argb */ - mov edi, [esp + 16 + 8] /* table_argb */ - mov ecx, [esp + 16 + 12] /* width */ - xor ebx, ebx - xor edx, edx + mov eax, [esp + 4 + 4] /* dst_argb */ + mov esi, [esp + 4 + 8] /* table_argb */ + mov ecx, [esp + 4 + 12] /* width */ - align 16 - convertloop: - mov ebp, dword ptr [eax] // BGRA - mov esi, ebp - and ebp, 255 - shr esi, 8 - and esi, 255 - mov bl, [edi + ebp * 4 + 0] // B - mov dl, [edi + esi * 4 + 1] // G - mov ebp, dword ptr [eax] // BGRA - mov esi, ebp - shr ebp, 16 - shr esi, 24 - and ebp, 255 - mov [eax], bl - mov [eax + 1], dl - mov bl, [edi + ebp * 4 + 2] // R - mov dl, [edi + esi * 4 + 3] // A - mov [eax + 2], bl - mov [eax + 3], dl + convertloop: + movzx edx, byte ptr [eax] lea eax, [eax + 4] - sub ecx, 1 + movzx edx, byte ptr [esi + edx * 4] + mov byte ptr [eax - 4], dl + movzx edx, byte ptr [eax - 4 + 1] + movzx edx, byte ptr [esi + edx * 4 + 1] + mov byte ptr [eax - 4 + 1], dl + movzx edx, byte ptr [eax - 4 + 2] + movzx edx, byte ptr [esi + edx * 4 + 2] + mov byte ptr [eax - 4 + 2], dl + movzx edx, byte ptr [eax - 4 + 3] + movzx edx, byte ptr [esi + edx * 4 + 3] + mov byte ptr [eax - 4 + 3], dl + dec ecx jg convertloop - pop ebp - pop edi pop esi - pop ebx ret } } @@ -5144,39 +5129,26 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, __declspec(naked) __declspec(align(16)) void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { __asm { - push ebx push esi - push edi - push ebp - mov eax, [esp + 16 + 4] /* dst_argb */ - mov edi, [esp + 16 + 8] /* table_argb */ - mov ecx, [esp + 16 + 12] /* width */ - xor ebx, ebx - xor edx, edx + mov eax, [esp + 4 + 4] /* dst_argb */ + mov esi, [esp + 4 + 8] /* table_argb */ + mov ecx, [esp + 4 + 12] /* width */ - align 16 - convertloop: - mov ebp, dword ptr [eax] // BGRA - mov esi, ebp - and ebp, 255 - shr esi, 8 - and esi, 255 - mov bl, [edi + ebp * 4 + 0] // B - mov dl, [edi + esi * 4 + 1] // G - mov ebp, dword ptr [eax] // BGRA - shr ebp, 16 - and ebp, 255 - mov [eax], bl - mov [eax + 1], dl - mov bl, [edi + ebp * 4 + 2] // R - mov [eax + 2], bl + convertloop: + movzx edx, byte ptr [eax] lea eax, [eax + 4] - sub ecx, 1 + movzx edx, byte ptr [esi + edx * 4] + mov byte ptr [eax - 4], dl + movzx edx, byte ptr [eax - 4 + 1] + movzx edx, byte ptr [esi + edx * 4 + 1] + mov byte ptr [eax - 4 + 1], dl + movzx edx, byte ptr [eax - 4 + 2] + movzx edx, byte ptr [esi + edx * 4 + 2] + mov byte ptr [eax - 4 + 2], dl + dec ecx jg convertloop - pop ebp - pop edi + pop esi - pop ebx ret } } diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index bdf6b6570..e67c87fb6 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -543,7 +543,7 @@ TEST_F(libyuvTest, TestARGBColorTable) { memset(orig_pixels, 0, sizeof(orig_pixels)); // Matrix for Sepia. - static const uint8 kARGBTable[1280 * 4] = { + static const uint8 kARGBTable[256 * 4] = { 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, @@ -602,7 +602,7 @@ TEST_F(libyuvTest, TestRGBColorTable) { memset(orig_pixels, 0, sizeof(orig_pixels)); // Matrix for Sepia. - static const uint8 kARGBTable[1280 * 4] = { + static const uint8 kARGBTable[256 * 4] = { 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u,