libyuv/source/convert.cc
mikhal@webrtc.org 22f9cfb688 libyuv: Clean up
Review URL: http://webrtc-codereview.appspot.com/220001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@14 16f28f9a-4ce2-e073-06de-1de4eb20be90
2011-10-10 20:11:26 +00:00

1468 lines
49 KiB
C++

/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "convert.h"
#include <string.h> // memcpy(), memset()
#include <assert.h>
#include <stdlib.h> // abs
//#define SCALEOPT //Currently for windows only. June 2010
#ifdef SCALEOPT
#include <emmintrin.h>
#endif
#include "conversion_tables.h"
namespace libyuv
{
// Clip value to [0,255]
inline uint8 Clip(int32 val);
#ifdef SCALEOPT
void *memcpy_16(void * dest, const void * src, size_t n);
void *memcpy_8(void * dest, const void * src, size_t n);
#endif
int
ConvertI420ToRGB24(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
// RGB orientation - bottom up
uint8* out = dst_frame + dst_stride * src_height * 3 - dst_stride * 3;
uint8* out2 = out - dst_stride * 3;
int h, w;
int tmpR, tmpG, tmpB;
const uint8 *y1, *y2 ,*u, *v;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[2] = Clip(tmpR);
out[1] = Clip(tmpG);
out[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[2] = Clip(tmpR);
out2[1] = Clip(tmpG);
out2[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[5] = Clip(tmpR);
out[4] = Clip(tmpG);
out[3] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[5] = Clip(tmpR);
out2[4] = Clip(tmpG);
out2[3] = Clip(tmpB);
out += 6;
out2 += 6;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += src_ystride + src_ystride - src_width;
y2 += src_ystride + src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= dst_stride * 9;
out2 -= dst_stride * 9;
} // end height for
return 0;
}
int
ConvertI420ToARGB(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
uint8* out1 = dst_frame;
uint8* out2 = out1 + dst_stride * 4;
const uint8 *y1,*y2, *u, *v;
y1 = src_yplane;
y2 = src_yplane + src_ystride;
u = src_uplane;
v = src_vplane;
int h, w;
int tmpR, tmpG, tmpB;
for (h = (src_height >> 1); h > 0; h--)
{
// Do 2 rows at the time
for (w = 0; w < (src_width >> 1); w++)
{ // Vertical and horizontal sub-sampling
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out1[3] = 0xff;
out1[2] = Clip(tmpR);
out1[1] = Clip(tmpG);
out1[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[3] = 0xff;
out2[2] = Clip(tmpR);
out2[1] = Clip(tmpG);
out2[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out1[7] = 0xff;
out1[6] = Clip(tmpR);
out1[5] = Clip(tmpG);
out1[4] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[7] = 0xff;
out2[6] = Clip(tmpR);
out2[5] = Clip(tmpG);
out2[4] = Clip(tmpB);
out1 += 8;
out2 += 8;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out1 += (2 * dst_stride - src_width) * 4;
out2 += (2 * dst_stride - src_width) * 4;
} // end height for
return 0;
}
int
ConvertYV12ToRGBA(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
int32 diff = dst_stride - src_width;
uint8 * out = dst_frame;
uint8 * out2 = out + dst_stride * 4;
const uint8 *y1,*y2, *u, *v;
int tmpG, tmpB, tmpR;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
v = src_vplane;
u = src_uplane;
for (h = (src_height >> 1); h > 0; h--)
{
// Do 2 rows at the time
for (w = 0; w < (src_width >> 1); w++)
{
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128 ) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128 ) >> 8);
out[1] = Clip(tmpR);
out[2] = Clip(tmpG);
out[3] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = Clip(tmpR);
out2[2] = Clip(tmpG);
out2[3] = Clip(tmpB);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[5] = Clip(tmpR);
out[6] = Clip(tmpG);
out[7] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[5] = Clip(tmpR);
out2[6] = Clip(tmpG);
out2[7] = Clip(tmpB);
out[0] = 0xff;
out[4] = 0xff;
out += 8;
out2[0] = 0xff;
out2[4] = 0xff;
out2 += 8;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out += (src_width + diff * 2) * 4;
out2 += (src_width + diff * 2) * 4;
}
return 0;
}
// Little Endian...
int
ConvertI420ToARGB4444(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
// RGB orientation - bottom up
uint8* out = dst_frame + dst_stride * (src_height - 1) * 2;
uint8* out2 = out - (2 * dst_stride);
int tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
int h, w;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// Convert to RGB888 and re-scale to 4 bits
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] =(uint8)((Clip(tmpG) & 0xf0) + (Clip(tmpB) >> 4));
out[1] = (uint8)(0xf0 + (Clip(tmpR) >> 4));
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint8)((Clip(tmpG) & 0xf0 ) + (Clip(tmpB) >> 4));
out2[1] = (uint8) (0xf0 + (Clip(tmpR) >> 4));
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[2] = (uint8)((Clip(tmpG) & 0xf0 ) + (Clip(tmpB) >> 4));
out[3] = (uint8)(0xf0 + (Clip(tmpR) >> 4));
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[2] = (uint8)((Clip(tmpG) & 0xf0 ) + (Clip(tmpB) >> 4));
out2[3] = (uint8)(0xf0 + (Clip(tmpR) >> 4));
out += 4;
out2 += 4;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= (2 * dst_stride + src_width) * 2;
out2 -= (2 * dst_stride + src_width) * 2;
} // end height for
return 0;
}
int
ConvertI420ToRGB565(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
uint16* out = (uint16*)(dst_frame) + dst_stride * (src_height - 1);
uint16* out2 = out - dst_stride;
int tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
int h, w;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB ) >> 3);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= 2 * dst_stride + src_width;
out2 -= 2 * dst_stride + src_width;
} // end height for
return 0;
}
//Same as ConvertI420ToRGB565 but doesn't flip vertically.
int
ConvertI420ToRGB565Android(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
uint16* out = (uint16*)(dst_frame);
uint16* out2 = out + dst_stride;
int tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB ) >> 3);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out += 2 * dst_stride - src_width;
out2 += 2 * dst_stride - src_width;
} // end height for
return 0;
}
int
ConvertI420ToARGB1555(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
uint16* out = (uint16*)(dst_frame) + dst_stride * (src_height - 1);
uint16* out2 = out - dst_stride ;
int32 tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
for (h = (src_height >> 1); h > 0; h--)
{ // 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 555
// 3. Add 1 for alpha value
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = (uint16)(0x8000 + ((Clip(tmpR) & 0xf8) << 10) +
((Clip(tmpG) & 0xf8) << 3) + (Clip(tmpB) >> 3));
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint16)(0x8000 + ((Clip(tmpR) & 0xf8) << 10) +
((Clip(tmpG) & 0xf8) << 3) + (Clip(tmpB) >> 3));
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[1] = (uint16)(0x8000 + ((Clip(tmpR) & 0xf8) << 10) +
((Clip(tmpG) & 0xf8) << 3) + (Clip(tmpB) >> 3));
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = (uint16)(0x8000 + ((Clip(tmpR) & 0xf8) << 10) +
((Clip(tmpG) & 0xf8) << 3) + (Clip(tmpB) >> 3));
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= 2 * dst_stride + src_width;
out2 -= 2 * dst_stride + src_width;
} // end height for
return 0;
}
int
ConvertI420ToYUY2(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
const uint8* in1 = src_yplane;
const uint8* in2 = src_yplane + src_ystride ;
const uint8* inU = src_uplane;
const uint8* inV = src_vplane;
uint8* out1 = dst_frame;
uint8* out2 = dst_frame + 2 * dst_stride;
// YUY2 - Macro-pixel = 2 image pixels
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
#ifndef SCALEOPT
for (int i = 0; i < (src_height >> 1);i++)
{
for (int j = 0; j < (src_width >> 1);j++)
{
out1[0] = in1[0];
out1[1] = *inU;
out1[2] = in1[1];
out1[3] = *inV;
out2[0] = in2[0];
out2[1] = *inU;
out2[2] = in2[1];
out2[3] = *inV;
out1 += 4;
out2 += 4;
inU++;
inV++;
in1 += 2;
in2 += 2;
}
in1 += 2 * src_ystride - src_width;
in2 += 2 * src_ystride - src_width;
inU += src_ustride - (src_width >> 1);
inV += src_vstride - (src_width >> 1);
out1 += 2 * dst_stride + 2 * (dst_stride - src_width);
out2 += 2 * dst_stride + 2 * (dst_stride - src_width);
}
#else
for (WebRtc_UWord32 i = 0; i < (height >> 1);i++)
{
int32 width__ = (width >> 4);
_asm
{
;pusha
mov eax, DWORD PTR [in1] ;1939.33
mov ecx, DWORD PTR [in2] ;1939.33
mov ebx, DWORD PTR [inU] ;1939.33
mov edx, DWORD PTR [inV] ;1939.33
loop0:
movq xmm6, QWORD PTR [ebx] ;inU
movq xmm0, QWORD PTR [edx] ;inV
punpcklbw xmm6, xmm0 ;inU, inV mix
;movdqa xmm1, xmm6
;movdqa xmm2, xmm6
;movdqa xmm4, xmm6
movdqu xmm3, XMMWORD PTR [eax] ;in1
movdqa xmm1, xmm3
punpcklbw xmm1, xmm6 ;in1, inU, in1, inV
mov esi, DWORD PTR [out1]
movdqu XMMWORD PTR [esi], xmm1 ;write to out1
movdqu xmm5, XMMWORD PTR [ecx] ;in2
movdqa xmm2, xmm5
punpcklbw xmm2, xmm6 ;in2, inU, in2, inV
mov edi, DWORD PTR [out2]
movdqu XMMWORD PTR [edi], xmm2 ;write to out2
punpckhbw xmm3, xmm6 ;in1, inU, in1, inV again
movdqu XMMWORD PTR [esi+16], xmm3 ;write to out1 again
add esi, 32
mov DWORD PTR [out1], esi
punpckhbw xmm5, xmm6 ;inU, in2, inV again
movdqu XMMWORD PTR [edi+16], xmm5 ;write to out2 again
add edi, 32
mov DWORD PTR [out2], edi
add ebx, 8
add edx, 8
add eax, 16
add ecx, 16
mov esi, DWORD PTR [width__]
sub esi, 1
mov DWORD PTR [width__], esi
jg loop0
mov DWORD PTR [in1], eax ;1939.33
mov DWORD PTR [in2], ecx ;1939.33
mov DWORD PTR [inU], ebx ;1939.33
mov DWORD PTR [inV], edx ;1939.33
;popa
emms
}
in1 += 2 * src_ystride - src_width;
in2 += 2 * src_ystride - src_width;
out1 += 2 * strideOut + 2 * (strideOut - width);
out2 += 2 * strideOut + 2 * (strideOut - width);
}
#endif
return 0;
}
int
ConvertI420ToUYVY(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
int i = 0;
const uint8* y1 = src_yplane;
const uint8* y2 = y1 + src_ystride;
const uint8* u = src_uplane;
const uint8* v = src_vplane;
uint8* out1 = dst_frame;
uint8* out2 = dst_frame + 2 * dst_stride;
// Macro-pixel = 2 image pixels
// U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5.....
#ifndef SCALEOPT
for (; i < (src_height >> 1);i++)
{
for (int j = 0; j < (src_width >> 1) ;j++)
{
out1[0] = *u;
out1[1] = y1[0];
out1[2] = *v;
out1[3] = y1[1];
out2[0] = *u;
out2[1] = y2[0];
out2[2] = *v;
out2[3] = y2[1];
out1 += 4;
out2 += 4;
u++;
v++;
y1 += 2;
y2 += 2;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out1 += 2 * (dst_stride + (dst_stride - src_width));
out2 += 2 * (dst_stride + (dst_stride - src_width));
}
#else
for (; i < (height >> 1);i++)
{
int32 width__ = (width >> 4);
_asm
{
;pusha
mov eax, DWORD PTR [in1] ;1939.33
mov ecx, DWORD PTR [in2] ;1939.33
mov ebx, DWORD PTR [inU] ;1939.33
mov edx, DWORD PTR [inV] ;1939.33
loop0:
movq xmm6, QWORD PTR [ebx] ;inU
movq xmm0, QWORD PTR [edx] ;inV
punpcklbw xmm6, xmm0 ;inU, inV mix
movdqa xmm1, xmm6
movdqa xmm2, xmm6
movdqa xmm4, xmm6
movdqu xmm3, XMMWORD PTR [eax] ;in1
punpcklbw xmm1, xmm3 ;inU, in1, inV
mov esi, DWORD PTR [out1]
movdqu XMMWORD PTR [esi], xmm1 ;write to out1
movdqu xmm5, XMMWORD PTR [ecx] ;in2
punpcklbw xmm2, xmm5 ;inU, in2, inV
mov edi, DWORD PTR [out2]
movdqu XMMWORD PTR [edi], xmm2 ;write to out2
punpckhbw xmm4, xmm3 ;inU, in1, inV again
movdqu XMMWORD PTR [esi+16], xmm4 ;write to out1 again
add esi, 32
mov DWORD PTR [out1], esi
punpckhbw xmm6, xmm5 ;inU, in2, inV again
movdqu XMMWORD PTR [edi+16], xmm6 ;write to out2 again
add edi, 32
mov DWORD PTR [out2], edi
add ebx, 8
add edx, 8
add eax, 16
add ecx, 16
mov esi, DWORD PTR [width__]
sub esi, 1
mov DWORD PTR [width__], esi
jg loop0
mov DWORD PTR [in1], eax ;1939.33
mov DWORD PTR [in2], ecx ;1939.33
mov DWORD PTR [inU], ebx ;1939.33
mov DWORD PTR [inV], edx ;1939.33
;popa
emms
}
in1 += width;
in2 += width;
out1 += 2 * (strideOut + (strideOut - width));
out2 += 2 * (strideOut + (strideOut - width));
}
#endif
return 0;
}
int
ConvertI420ToYV12(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
const uint8* inFrame = src_yplane;
uint8* outFrame = dst_frame;
// Copy Y
for (int i = 0; i < src_height; i++)
{
#ifndef SCALEOPT
memcpy(outFrame, inFrame, src_width);
#else
memcpy_16(outFrame, inFrame, src_width);
#endif
inFrame += src_ystride;
outFrame += dst_stride;
}
// Copy U
inFrame = src_uplane;
outFrame += (dst_stride >> 1) * src_height >> 1;
for (uint32 i = 0; i < src_height >>1; i++)
{
#ifndef SCALEOPT
memcpy(outFrame, inFrame, src_width >> 1);
#else
memcpy_8(outFrame, inFrame, src_width >> 1);
#endif
inFrame += src_ustride;
outFrame += dst_stride >> 1;
}
outFrame -= dst_stride * src_height >> 1;
// Copy V
for (uint32 i = 0; i < src_height >> 1; i++)
{
#ifndef SCALEOPT
memcpy(outFrame, inFrame, src_width >> 1);
#else
memcpy_8(outFrame, inFrame, width >> 1);
#endif
inFrame += src_vstride;
outFrame += dst_stride >> 1;
}
return 0;
}
int
ConvertNv12ToRGB565(const uint8* src_yplane, int src_ystride,
const uint8* src_uvplane, int src_uvstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_height < 1 || src_height < 1 || src_ystride < 1 || src_uvstride < 1)
{
return -1;
}
// Bi-Planar: Y plane followed by an interlaced U and V plane
const uint8* interlacedSrc = src_uvplane;
uint16* out = (uint16*)(src_yplane) + dst_stride * (src_height - 1);
uint16* out2 = out - dst_stride;
int32 tmpR, tmpG, tmpB;
const uint8 *y1,*y2;
y1 = src_yplane;
y2 = y1 + src_ystride;
int h, w;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565
tmpR = (int32)((mapYc[y1[0]] + mapVcr[interlacedSrc[1]]
+ 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[interlacedSrc[0]]
+ mapVcg[interlacedSrc[1]] + 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[interlacedSrc[0]]
+ 128) >> 8);
out[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[interlacedSrc[1]]
+ 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[interlacedSrc[0]]
+ mapVcg[interlacedSrc[1]] + 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[interlacedSrc[0]]
+ 128) >> 8);
out2[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[interlacedSrc[1]]
+ 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[interlacedSrc[0]]
+ mapVcg[interlacedSrc[1]] + 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[interlacedSrc[0]]
+ 128) >> 8);
out[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB ) >> 3);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[interlacedSrc[1]]
+ 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[interlacedSrc[0]]
+ mapVcg[interlacedSrc[1]] + 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[interlacedSrc[0]]
+ 128) >> 8);
out2[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
interlacedSrc += 2;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
interlacedSrc += src_uvstride - (src_width >> 1);
out -= 3 * dst_stride + dst_stride - src_width;
out2 -= 3 * dst_stride + dst_stride - src_width;
} // end height for
return 0;
}
int
ConvertI420ToABGR(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
// RGB orientation - bottom up
uint8* out = dst_frame + 4 * dst_stride * (src_height - 1);
uint8* out2 = out - dst_stride * 4;
int32 tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
tmpR = (int32)((298 * (y1[0] - 16) + 409 * (v[0] - 128)
+ 128) >> 8);
tmpG = (int32)((298 * (y1[0] - 16) - 100 * (u[0] - 128)
- 208 * (v[0] - 128) + 128 ) >> 8);
tmpB = (int32)((298 * (y1[0] - 16) + 516 * (u[0] - 128)
+ 128 ) >> 8);
out[3] = 0xff;
out[0] = Clip(tmpR);
out[1] = Clip(tmpG);
out[2] = Clip(tmpB);
tmpR = (int32)((298 * (y2[0] - 16) + 409 * (v[0] - 128)
+ 128) >> 8);
tmpG = (int32)((298 * (y2[0] - 16) - 100 * (u[0] - 128)
- 208 * (v[0] - 128) + 128) >> 8);
tmpB = (int32)((298 * (y2[0] - 16) + 516 * (u[0] - 128)
+ 128) >> 8);
out2[3] = 0xff;
out2[0] = Clip(tmpR);
out2[1] = Clip(tmpG);
out2[2] = Clip(tmpB);
tmpR = (int32)((298 * (y1[1] - 16) + 409 * (v[0] - 128)
+ 128 ) >> 8);
tmpG = (int32)((298 * (y1[1] - 16) - 100 * (u[0] - 128)
- 208 * (v[0] - 128) + 128 ) >> 8);
tmpB = (int32)((298 * (y1[1] - 16) + 516 * (u[0] - 128)
+ 128) >> 8);
out[7] = 0xff;
out[4] = Clip(tmpR);
out[5] = Clip(tmpG);
out[6] = Clip(tmpB);
tmpR = (int32)((298 * (y2[1] - 16) + 409 * (v[0] - 128)
+ 128) >> 8);
tmpG = (int32)((298 * (y2[1] - 16) - 100 * (u[0] - 128)
- 208 * (v[0] - 128) + 128) >> 8);
tmpB = (int32)((298 * (y2[1] - 16) + 516 * (u[0] - 128)
+ 128 ) >> 8);
out2[7] = 0xff;
out2[4] = Clip(tmpR);
out2[5] = Clip(tmpG);
out2[6] = Clip(tmpB);
out += 8;
out2 += 8;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += src_ystride + src_ystride - src_width;
y2 += src_ystride + src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= (2 * dst_stride + src_width) * 4;
out2 -= (2 * dst_stride + src_width) * 4;
} // end height for
return 0;
}
int
ConvertUYVYToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || dst_ystride == 0 ||
dst_ustride == 0 || dst_vstride == 0 || src_stride == 0)
{
return -1;
}
int i, j;
uint8* outI = dst_yplane;
uint8* outU = dst_uplane;
uint8* outV = dst_vplane;
// U0Y0V0Y1..U2Y2V2Y3.....
for (i = 0; i < (src_height >> 1); i++)
{
for (j = 0; j < (src_width >> 1); j++)
{
outI[0] = src_frame[1];
*outU = src_frame[0];
outI[1] = src_frame[3];
*outV = src_frame[2];
src_frame += 4;
outI += 2;
outU++;
outV++;
}
for (j = 0; j < (src_width >> 1); j++)
{
outI[0] = src_frame[1];
outI[1] = src_frame[3];
src_frame += 4;
outI += 2;
}
outI += dst_ystride - src_width;
outU += dst_ustride - src_width << 1;
outV += dst_vstride - src_width << 1;
}
return 0;
}
int
ConvertRGB24ToARGB(const uint8* src_frame, int src_stride,
uint8* dst_frame, int dst_stride,
int src_width, int src_height
)
{
if (src_width < 1 || src_height < 1 || dst_stride < 1)
{
return -1;
}
int i, j, offset;
uint8* outFrame = dst_frame;
const uint8* inFrame = src_frame;
outFrame += dst_stride * (src_height - 1) * 4;
for (i = 0; i < src_height; i++)
{
for (j = 0; j < src_width; j++)
{
offset = j*4;
outFrame[0 + offset] = inFrame[0];
outFrame[1 + offset] = inFrame[1];
outFrame[2 + offset] = inFrame[2];
outFrame[3 + offset] = 0xff;
inFrame += 3;
}
outFrame -= 4 * (dst_stride - src_width);
inFrame += src_stride - src_width;
}
return 0;
}
int
ConvertRGB24ToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || dst_ystride == 0 ||
dst_ustride == 0 || dst_vstride == 0 || src_stride == 0)
{
return -1;
}
uint8* yStartPtr;
uint8* yStartPtr2;
uint8* uStartPtr;
uint8* vStartPtr;
const uint8* inpPtr;
const uint8* inpPtr2;
int h, w;
// Assuming RGB in a bottom up orientation.
yStartPtr = dst_yplane;
yStartPtr2 = yStartPtr + dst_ystride;
uStartPtr = dst_uplane;
vStartPtr =dst_vplane;
inpPtr = src_frame + src_stride * src_height * 3 - 3 * src_height;
inpPtr2 = inpPtr - 3 * src_stride;
for (h = 0; h < (src_height >> 1); h++ )
{
for (w = 0; w < (src_width >> 1); w++)
{
// Y
yStartPtr[0] = (uint8)((66 * inpPtr[2] + 129 * inpPtr[1]
+ 25 * inpPtr[0] + 128) >> 8) + 16;
yStartPtr2[0] = (uint8)((66 * inpPtr2[2] + 129 * inpPtr2[1]
+ 25 * inpPtr2[0] + 128) >> 8) + 16;
// Moving to next column
yStartPtr[1] = (uint8)((66 * inpPtr[5] + 129 * inpPtr[4]
+ 25 * inpPtr[3] + 128) >> 8) + 16;
yStartPtr2[1] = (uint8)((66 * inpPtr2[5] + 129 * inpPtr2[4]
+ 25 * inpPtr2[3] + 128) >> 8 ) + 16;
// U
uStartPtr[0] = (uint8)((-38 * inpPtr[2] - 74 * inpPtr[1] +
112 * inpPtr[0] + 128) >> 8) + 128;
// V
vStartPtr[0] = (uint8)((112 * inpPtr[2] -94 * inpPtr[1] -
18 * inpPtr[0] + 128) >> 8) + 128;
yStartPtr += 2;
yStartPtr2 += 2;
uStartPtr++;
vStartPtr++;
inpPtr += 6;
inpPtr2 += 6;
} // end for w
yStartPtr += dst_ystride + dst_ystride - src_width;
yStartPtr2 += dst_ystride + dst_ystride - src_width;
uStartPtr += dst_ustride + dst_ustride - (src_width >> 1);
vStartPtr += dst_vstride + dst_vstride - (src_width >> 1);
inpPtr -= 3 * (2 * src_stride + src_width);
inpPtr2 -= 3 * (2 * src_stride + src_width);
} // end for h
return 0;
}
int
ConvertYV12ToARGB(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
int32 diff = dst_stride - src_width;
uint8* out = dst_frame;
uint8* out2 = out + dst_stride * 4;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
v = src_vplane;
u = src_uplane;
int32 tmpG, tmpB, tmpR;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time
for (w = 0; w < (src_width >> 1); w++)
{
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128 )>> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128 )>> 8);
out[2] = Clip(tmpR);
out[1] = Clip(tmpG);
out[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[2] = Clip(tmpR);
out2[1] = Clip(tmpG);
out2[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128)>> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[6] = Clip(tmpR);
out[5] = Clip(tmpG);
out[4] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[6] = Clip(tmpR);
out2[5] = Clip(tmpG);
out2[4] = Clip(tmpB);
out[3] = 0xff;
out[7] = 0xff;
out += 8;
out2[3] = 0xff;
out2[7] = 0xff;
out2 += 8;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out += 4 * (2 * dst_stride - src_width);
out2 += 4 * (2 * dst_stride - src_width);
}
return 0;
}
int
ConvertABGRToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || dst_ystride == 0 ||
dst_ustride == 0 || dst_vstride == 0 || src_stride == 0)
{
return -1;
}
uint8* yStartPtr;
uint8* yStartPtr2;
uint8* uStartPtr;
uint8* vStartPtr;
const uint8* inpPtr;
const uint8* inpPtr2;
yStartPtr = dst_yplane;
yStartPtr2 = yStartPtr + dst_ystride;
uStartPtr = dst_uplane;
vStartPtr = dst_vplane;
inpPtr = src_frame;
inpPtr2 = inpPtr + 4 * src_stride;
int h, w;
for (h = 0; h < (src_height >> 1); h++)
{
for (w = 0; w < (src_height >> 1); w++)
{
// Y
yStartPtr[0] = (uint8)((66 * inpPtr[1] + 129 * inpPtr[2]
+ 25 * inpPtr[3] + 128) >> 8) + 16;
yStartPtr2[0] = (uint8)((66 * inpPtr2[1] + 129 * inpPtr2[2]
+ 25 * inpPtr2[3] + 128) >> 8) + 16;
// Moving to next column
yStartPtr[1] = (uint8)((66 * inpPtr[5] + 129 * inpPtr[6]
+ 25 * inpPtr[7] + 128) >> 8) + 16;
yStartPtr2[1] = (uint8)((66 * inpPtr2[5] + 129 * inpPtr2[6]
+ 25 * inpPtr2[7] + 128) >> 8) + 16;
// U
uStartPtr[0] = (uint8)((-38 * inpPtr[1] - 74 * inpPtr[2]
+ 112 * inpPtr[3] + 128) >> 8) + 128;
// V
vStartPtr[0] = (uint8)((112 * inpPtr[1] - 94 * inpPtr[2]
- 18 * inpPtr[3] + 128) >> 8) + 128;
yStartPtr += 2;
yStartPtr2 += 2;
uStartPtr++;
vStartPtr++;
inpPtr += 8;
inpPtr2 += 8;
}
yStartPtr += 2 * dst_ystride - src_width;
yStartPtr2 += 2 * dst_ystride - src_width;
uStartPtr += dst_ustride + dst_ustride - (src_width >> 1);
vStartPtr += dst_vstride + dst_vstride - (src_width >> 1);
inpPtr += 4 * (2 * src_stride - src_width);
inpPtr2 += 4 * (2 * src_stride - src_width);
}
return 0;
}
inline
uint8 Clip(int32 val)
{
if (val < 0)
{
return (uint8)0;
} else if (val > 255)
{
return (uint8)255;
}
return (uint8)val;
}
#ifdef SCALEOPT
//memcpy_16 assumes that width is an integer multiple of 16!
void
*memcpy_16(void * dest, const void * src, size_t n)
{
_asm
{
mov eax, dword ptr [src]
mov ebx, dword ptr [dest]
mov ecx, dword ptr [n]
loop0:
movdqu xmm0, XMMWORD PTR [eax]
movdqu XMMWORD PTR [ebx], xmm0
add eax, 16
add ebx, 16
sub ecx, 16
jg loop0
}
}
// memcpy_8 assumes that width is an integer multiple of 8!
void
*memcpy_8(void * dest, const void * src, size_t n)
{
_asm
{
mov eax, dword ptr [src]
mov ebx, dword ptr [dest]
mov ecx, dword ptr [n]
loop0:
movq mm0, QWORD PTR [eax]
movq QWORD PTR [ebx], mm0
add eax, 8
add ebx, 8
sub ecx, 8
jg loop0
emms
}
}
#endif
} // namespace libyuv