diff --git a/libyuv.gyp b/libyuv.gyp index 3824a8008..44a6e116d 100644 --- a/libyuv.gyp +++ b/libyuv.gyp @@ -42,11 +42,21 @@ 'source/format_conversion.cc', 'source/general.cc', 'source/planar_functions.cc', - 'source/row_posix.cc', 'source/row_table.cc', 'source/scale.cc', 'source/video_common.cc', ], + 'conditions': [ + ['OS=="win"', { + 'sources': [ + 'source/row_win.cc', + ], + },{ # else + 'sources': [ + 'source/row_posix.cc', + ], + }], + ] }, ], # targets } diff --git a/source/row_win.cc b/source/row_win.cc new file mode 100644 index 000000000..e26cc9934 --- /dev/null +++ b/source/row_win.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "row.h" + +#define kCoefficientsRgbY _kCoefficientsRgbY + 0 +#define kCoefficientsRgbU _kCoefficientsRgbY + 2048 +#define kCoefficientsRgbV _kCoefficientsRgbY + 4096 + +extern "C" { + +__declspec(naked) +void FastConvertYUVToRGB32Row(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + __asm { + pushad + mov edx, [esp + 32 + 4] // Y + mov edi, [esp + 32 + 8] // U + mov esi, [esp + 32 + 12] // V + mov ebp, [esp + 32 + 16] // rgb + mov ecx, [esp + 32 + 20] // width + + convertloop : + movzx eax, byte ptr [edi] + lea edi, [edi + 1] + movzx ebx, byte ptr [esi] + lea esi, [esi + 1] + movq mm0, [kCoefficientsRgbU + 8 * eax] + movzx eax, byte ptr [edx] + paddsw mm0, [kCoefficientsRgbV + 8 * ebx] + movzx ebx, byte ptr [edx + 1] + movq mm1, [kCoefficientsRgbY + 8 * eax] + lea edx, [edx + 2] + movq mm2, [kCoefficientsRgbY + 8 * ebx] + paddsw mm1, mm0 + paddsw mm2, mm0 + psraw mm1, 6 + psraw mm2, 6 + packuswb mm1, mm2 + movntq [ebp], mm1 + lea ebp, [ebp + 8] + sub ecx, 2 + ja convertloop + + popad + ret + } +} + +__declspec(naked) +void FastConvertYUV444ToRGB32Row(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width) { + __asm { + pushad + mov edx, [esp + 32 + 4] // Y + mov edi, [esp + 32 + 8] // U + mov esi, [esp + 32 + 12] // V + mov ebp, [esp + 32 + 16] // rgb + mov ecx, [esp + 32 + 20] // width + + convertloop : + movzx eax, byte ptr [edi] + lea edi, [edi + 1] + movzx ebx, byte ptr [esi] + lea esi, [esi + 1] + movq mm0, [kCoefficientsRgbU + 8 * eax] + movzx eax, byte ptr [edx] + paddsw mm0, [kCoefficientsRgbV + 8 * ebx] + lea edx, [edx + 1] + paddsw mm0, [kCoefficientsRgbY + 8 * eax] + psraw mm0, 6 + packuswb mm0, mm0 + movd [ebp], mm0 + lea ebp, [ebp + 4] + sub ecx, 1 + ja convertloop + + popad + ret + } +} + +__declspec(naked) +void FastConvertYToRGB32Row(const uint8* y_buf, + uint8* rgb_buf, + int width) { + __asm { + push ebx + mov eax, [esp + 4 + 4] // Y + mov edx, [esp + 4 + 8] // rgb + mov ecx, [esp + 4 + 12] // width + + convertloop : + movzx ebx, byte ptr [eax] + movq mm0, [kCoefficientsRgbY + 8 * ebx] + psraw mm0, 6 + movzx ebx, byte ptr [eax + 1] + movq mm1, [kCoefficientsRgbY + 8 * ebx] + psraw mm1, 6 + packuswb mm0, mm1 + lea eax, [eax + 2] + movq [edx], mm0 + lea edx, [edx + 8] + sub ecx, 2 + ja convertloop + + pop ebx + ret + } +} + +} // extern "C"