mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
- Renumber cpuid bits to use low byte to ID the type of CPU and upper 24 bits for features Intel CPUs starting at Icelake support FSMR adl:Has FSMR 0x8000 arl:Has FSMR 0x0 bdw:Has FSMR 0x0 clx:Has FSMR 0x0 cnl:Has FSMR 0x0 cpx:Has FSMR 0x0 emr:Has FSMR 0x8000 glm:Has FSMR 0x0 glp:Has FSMR 0x0 gnr:Has FSMR 0x8000 gnr256:Has FSMR 0x8000 hsw:Has FSMR 0x0 icl:Has FSMR 0x8000 icx:Has FSMR 0x8000 ivb:Has FSMR 0x0 knl:Has FSMR 0x0 knm:Has FSMR 0x0 lnl:Has FSMR 0x8000 mrm:Has FSMR 0x0 mtl:Has FSMR 0x8000 nhm:Has FSMR 0x0 pnr:Has FSMR 0x0 rpl:Has FSMR 0x8000 skl:Has FSMR 0x0 skx:Has FSMR 0x0 slm:Has FSMR 0x0 slt:Has FSMR 0x0 snb:Has FSMR 0x0 snr:Has FSMR 0x0 spr:Has FSMR 0x8000 srf:Has FSMR 0x0 tgl:Has FSMR 0x8000 tnt:Has FSMR 0x0 wsm:Has FSMR 0x0 Intel CPUs starting at Ivybridge support ERMS adl:Has ERMS 0x4000 arl:Has ERMS 0x4000 bdw:Has ERMS 0x4000 clx:Has ERMS 0x4000 cnl:Has ERMS 0x4000 cpx:Has ERMS 0x4000 emr:Has ERMS 0x4000 glm:Has ERMS 0x4000 glp:Has ERMS 0x4000 gnr:Has ERMS 0x4000 gnr256:Has ERMS 0x4000 hsw:Has ERMS 0x4000 icl:Has ERMS 0x4000 icx:Has ERMS 0x4000 ivb:Has ERMS 0x4000 knl:Has ERMS 0x4000 knm:Has ERMS 0x4000 lnl:Has ERMS 0x4000 mrm:Has ERMS 0x0 mtl:Has ERMS 0x4000 nhm:Has ERMS 0x0 pnr:Has ERMS 0x0 rpl:Has ERMS 0x4000 skl:Has ERMS 0x4000 skx:Has ERMS 0x4000 slm:Has ERMS 0x4000 slt:Has ERMS 0x0 snb:Has ERMS 0x0 snr:Has ERMS 0x4000 spr:Has ERMS 0x4000 srf:Has ERMS 0x4000 tgl:Has ERMS 0x4000 tnt:Has ERMS 0x4000 wsm:Has ERMS 0x0 Change-Id: I18e5a3905f2691ab66d4d0cb6f668c0a0ff72d37 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6027541 Reviewed-by: richard winterton <rrwinterton@gmail.com>
270 lines
8.3 KiB
C++
270 lines
8.3 KiB
C++
/*
|
|
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "libyuv/rotate_argb.h"
|
|
|
|
#include "libyuv/convert.h"
|
|
#include "libyuv/cpu_id.h"
|
|
#include "libyuv/planar_functions.h"
|
|
#include "libyuv/rotate.h"
|
|
#include "libyuv/row.h"
|
|
#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */
|
|
|
|
#ifdef __cplusplus
|
|
namespace libyuv {
|
|
extern "C" {
|
|
#endif
|
|
|
|
static int ARGBTranspose(const uint8_t* src_argb,
|
|
int src_stride_argb,
|
|
uint8_t* dst_argb,
|
|
int dst_stride_argb,
|
|
int width,
|
|
int height) {
|
|
int i;
|
|
int src_pixel_step = src_stride_argb >> 2;
|
|
void (*ScaleARGBRowDownEven)(
|
|
const uint8_t* src_argb, ptrdiff_t src_stride_argb, int src_step,
|
|
uint8_t* dst_argb, int dst_width) = ScaleARGBRowDownEven_C;
|
|
// Check stride is a multiple of 4.
|
|
if (src_stride_argb & 3) {
|
|
return -1;
|
|
}
|
|
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_SSE2;
|
|
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
|
|
if (TestCpuFlag(kCpuHasNEON)) {
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_NEON;
|
|
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
|
|
if (TestCpuFlag(kCpuHasMSA)) {
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MSA;
|
|
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_MSA;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_SCALEARGBROWDOWNEVEN_LSX)
|
|
if (TestCpuFlag(kCpuHasLSX)) {
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_LSX;
|
|
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_LSX;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
|
|
if (TestCpuFlag(kCpuHasRVV)) {
|
|
ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV;
|
|
}
|
|
#endif
|
|
|
|
for (i = 0; i < width; ++i) { // column of source to row of dest.
|
|
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
|
|
dst_argb += dst_stride_argb;
|
|
src_argb += 4;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int ARGBRotate90(const uint8_t* src_argb,
|
|
int src_stride_argb,
|
|
uint8_t* dst_argb,
|
|
int dst_stride_argb,
|
|
int width,
|
|
int height) {
|
|
// Rotate by 90 is a ARGBTranspose with the source read
|
|
// from bottom to top. So set the source pointer to the end
|
|
// of the buffer and flip the sign of the source stride.
|
|
src_argb += src_stride_argb * (height - 1);
|
|
src_stride_argb = -src_stride_argb;
|
|
return ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
|
width, height);
|
|
}
|
|
|
|
static int ARGBRotate270(const uint8_t* src_argb,
|
|
int src_stride_argb,
|
|
uint8_t* dst_argb,
|
|
int dst_stride_argb,
|
|
int width,
|
|
int height) {
|
|
// Rotate by 270 is a ARGBTranspose with the destination written
|
|
// from bottom to top. So set the destination pointer to the end
|
|
// of the buffer and flip the sign of the destination stride.
|
|
dst_argb += dst_stride_argb * (width - 1);
|
|
dst_stride_argb = -dst_stride_argb;
|
|
return ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
|
width, height);
|
|
}
|
|
|
|
static int ARGBRotate180(const uint8_t* src_argb,
|
|
int src_stride_argb,
|
|
uint8_t* dst_argb,
|
|
int dst_stride_argb,
|
|
int width,
|
|
int height) {
|
|
// Swap first and last row and mirror the content. Uses a temporary row.
|
|
const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1);
|
|
uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1);
|
|
int half_height = (height + 1) >> 1;
|
|
int y;
|
|
void (*ARGBMirrorRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
|
ARGBMirrorRow_C;
|
|
void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
|
CopyRow_C;
|
|
align_buffer_64(row, width * 4);
|
|
if (!row)
|
|
return 1;
|
|
#if defined(HAS_ARGBMIRRORROW_NEON)
|
|
if (TestCpuFlag(kCpuHasNEON)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
|
|
if (IS_ALIGNED(width, 8)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_NEON;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_ARGBMIRRORROW_SSE2)
|
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
|
|
if (IS_ALIGNED(width, 4)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_SSE2;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_ARGBMIRRORROW_AVX2)
|
|
if (TestCpuFlag(kCpuHasAVX2)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
|
|
if (IS_ALIGNED(width, 8)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_AVX2;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_ARGBMIRRORROW_MSA)
|
|
if (TestCpuFlag(kCpuHasMSA)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
|
|
if (IS_ALIGNED(width, 16)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_MSA;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_ARGBMIRRORROW_LSX)
|
|
if (TestCpuFlag(kCpuHasLSX)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
|
|
if (IS_ALIGNED(width, 8)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_LSX;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_ARGBMIRRORROW_LASX)
|
|
if (TestCpuFlag(kCpuHasLASX)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
|
|
if (IS_ALIGNED(width, 16)) {
|
|
ARGBMirrorRow = ARGBMirrorRow_LASX;
|
|
}
|
|
}
|
|
#endif
|
|
#if defined(HAS_COPYROW_SSE2)
|
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
|
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
|
}
|
|
#endif
|
|
#if defined(HAS_COPYROW_AVX)
|
|
if (TestCpuFlag(kCpuHasAVX)) {
|
|
CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
|
|
}
|
|
#endif
|
|
#if defined(HAS_COPYROW_AVX512BW)
|
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
|
CopyRow = IS_ALIGNED(width * 4, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW;
|
|
}
|
|
#endif
|
|
#if defined(HAS_COPYROW_ERMS)
|
|
if (TestCpuFlag(kCpuHasERMS)) {
|
|
CopyRow = CopyRow_ERMS;
|
|
}
|
|
#endif
|
|
#if defined(HAS_COPYROW_NEON)
|
|
if (TestCpuFlag(kCpuHasNEON)) {
|
|
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
|
|
}
|
|
#endif
|
|
#if defined(HAS_COPYROW_RVV)
|
|
if (TestCpuFlag(kCpuHasRVV)) {
|
|
CopyRow = CopyRow_RVV;
|
|
}
|
|
#endif
|
|
|
|
// Odd height will harmlessly mirror the middle row twice.
|
|
for (y = 0; y < half_height; ++y) {
|
|
ARGBMirrorRow(src_argb, row, width); // Mirror first row into a buffer
|
|
ARGBMirrorRow(src_bot, dst_argb, width); // Mirror last row into first row
|
|
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
|
|
src_argb += src_stride_argb;
|
|
dst_argb += dst_stride_argb;
|
|
src_bot -= src_stride_argb;
|
|
dst_bot -= dst_stride_argb;
|
|
}
|
|
free_aligned_buffer_64(row);
|
|
return 0;
|
|
}
|
|
|
|
LIBYUV_API
|
|
int ARGBRotate(const uint8_t* src_argb,
|
|
int src_stride_argb,
|
|
uint8_t* dst_argb,
|
|
int dst_stride_argb,
|
|
int width,
|
|
int height,
|
|
enum RotationMode mode) {
|
|
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
|
|
return -1;
|
|
}
|
|
|
|
// Negative height means invert the image.
|
|
if (height < 0) {
|
|
height = -height;
|
|
src_argb = src_argb + (height - 1) * src_stride_argb;
|
|
src_stride_argb = -src_stride_argb;
|
|
}
|
|
|
|
switch (mode) {
|
|
case kRotate0:
|
|
// copy frame
|
|
return ARGBCopy(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
|
width, height);
|
|
case kRotate90:
|
|
return ARGBRotate90(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
|
width, height);
|
|
case kRotate270:
|
|
return ARGBRotate270(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
|
width, height);
|
|
case kRotate180:
|
|
return ARGBRotate180(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
|
width, height);
|
|
default:
|
|
break;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
} // extern "C"
|
|
} // namespace libyuv
|
|
#endif
|