libyuv/unit_test
Frank Barchard 451af5e922 scale by 1 for neon implemented
void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
  asm volatile (
  "1:                                          \n"
    MEMACCESS(0)
    "ld1        {v1.16b}, [%0], #16            \n"  // load 8 shorts
    "subs       %w2, %w2, #8                   \n"  // 8 pixels per loop
    "uxtl       v2.4s, v1.4h                   \n"  // 8 int's
    "uxtl2      v1.4s, v1.8h                   \n"
    "scvtf      v2.4s, v2.4s                   \n"  // 8 floats
    "scvtf      v1.4s, v1.4s                   \n"
    "fcvtn      v4.4h, v2.4s                   \n"  // 8 floatsgit
    "fcvtn2     v4.8h, v1.4s                   \n"
   MEMACCESS(1)
    "st1        {v4.16b}, [%1], #16            \n"  // store 8 shorts
    "b.gt       1b                             \n"
  : "+r"(src),    // %0
    "+r"(dst),    // %1
    "+r"(width)   // %2
  :
  : "cc", "memory", "v1", "v2", "v4"
  );
}

void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
  asm volatile (
  "1:                                          \n"
    MEMACCESS(0)
    "ld1        {v1.16b}, [%0], #16            \n"  // load 8 shorts
    "subs       %w2, %w2, #8                   \n"  // 8 pixels per loop
    "uxtl       v2.4s, v1.4h                   \n"  // 8 int's
    "uxtl2      v1.4s, v1.8h                   \n"
    "scvtf      v2.4s, v2.4s                   \n"  // 8 floats
    "scvtf      v1.4s, v1.4s                   \n"
    "fmul       v2.4s, v2.4s, %3.s[0]          \n"  // adjust exponent
    "fmul       v1.4s, v1.4s, %3.s[0]          \n"
    "uqshrn     v4.4h, v2.4s, #13              \n"  // isolate halffloat
    "uqshrn2    v4.8h, v1.4s, #13              \n"
   MEMACCESS(1)
    "st1        {v4.16b}, [%1], #16            \n"  // store 8 shorts
    "b.gt       1b                             \n"
  : "+r"(src),    // %0
    "+r"(dst),    // %1
    "+r"(width)   // %2
  : "w"(scale * 1.9259299444e-34f)    // %3
  : "cc", "memory", "v1", "v2", "v4"
  );
}

TEST=LibYUVPlanarTest.TestHalfFloatPlane_One
BUG=libyuv:560
R=hubbe@chromium.org

Review URL: https://codereview.chromium.org/2430313008 .
2016-10-21 14:30:03 -07:00
..
testdata Detect asimd as same as Neon for Arm features. Used on Juno aarch64 linux. 2014-09-22 18:30:17 +00:00
basictypes_test.cc break up unittests into categories 2015-10-13 16:01:07 -07:00
color_test.cc j422 now uses j420 source code so increase error threshold to match. 2016-05-31 19:45:34 -07:00
compare_test.cc Cast for clang-cl 64 bit build warnings in unittests 2016-10-12 13:09:57 -07:00
convert_test.cc Cast for clang-cl 64 bit build warnings in unittests 2016-10-12 13:09:57 -07:00
cpu_test.cc Add f16c (halffloat) cpuid 2016-10-14 16:34:08 -07:00
math_test.cc remove row.h from unittests 2016-05-27 10:57:49 -07:00
planar_test.cc scale by 1 for neon implemented 2016-10-21 14:30:03 -07:00
rotate_argb_test.cc remove row.h from unittests 2016-05-27 10:57:49 -07:00
rotate_test.cc remove row.h from unittests 2016-05-27 10:57:49 -07:00
scale_argb_test.cc remove row.h from unittests 2016-05-27 10:57:49 -07:00
scale_test.cc YUV scale filter columns improved filtering accuracy 2016-06-23 20:16:55 -07:00
unit_test.cc document cpuid command line behavior 2016-06-08 10:38:09 -07:00
unit_test.h make unittest allocator align to 64 bytes. 2016-05-27 18:02:47 -07:00
video_common_test.cc Cast for clang-cl 64 bit build warnings in unittests 2016-10-12 13:09:57 -07:00