diff --git a/libyuv_test.gyp b/libyuv_test.gyp index 4429a1de0..45c184786 100644 --- a/libyuv_test.gyp +++ b/libyuv_test.gyp @@ -71,6 +71,17 @@ }], ], # conditions }, + # TODO(fbarchard): Enable SSE2 and OpenMP for better performance. + { + 'target_name': 'psnr', + 'type': 'executable', + 'sources': [ + # sources + 'util/psnr_main.cc', + 'util/psnr.cc', + 'util/ssim.cc', + ], + }, ], # targets } diff --git a/util/psnr.cc b/util/psnr.cc new file mode 100644 index 000000000..a7f6c741b --- /dev/null +++ b/util/psnr.cc @@ -0,0 +1,242 @@ +/* + * Copyright 2013 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./psnr.h" + +#include + +#ifdef _OPENMP +#include +#endif +#ifdef _MSC_VER +#include // For __cpuid() +#endif + +typedef unsigned int uint32; // NOLINT +#ifdef _MSC_VER +typedef unsigned __int64 uint64; +#else // COMPILER_MSVC +#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) +typedef unsigned long uint64; // NOLINT +#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) +typedef unsigned long long uint64; // NOLINT +#endif // __LP64__ +#endif // _MSC_VER + +// PSNR formula: psnr = 10 * log10 (Peak Signal^2 * size / sse) +double ComputePSNR(double sse, double size) { + const double kMINSSE = 255.0 * 255.0 * size / pow(10., kMaxPSNR / 10.); + if (sse <= kMINSSE) + sse = kMINSSE; // Produces max PSNR of 128 + return 10.0 * log10(65025.0 * size / sse); +} + +#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) +#define HAS_SUMSQUAREERROR_NEON +static uint32 SumSquareError_NEON(const uint8* src_a, + const uint8* src_b, int count) { + volatile uint32 sse; + asm volatile ( + "vmov.u8 q7, #0 \n" + "vmov.u8 q9, #0 \n" + "vmov.u8 q8, #0 \n" + "vmov.u8 q10, #0 \n" + + "1: \n" + "vld1.u8 {q0}, [%0]! \n" + "vld1.u8 {q1}, [%1]! \n" + "vsubl.u8 q2, d0, d2 \n" + "vsubl.u8 q3, d1, d3 \n" + "vmlal.s16 q7, d4, d4 \n" + "vmlal.s16 q8, d6, d6 \n" + "vmlal.s16 q8, d5, d5 \n" + "vmlal.s16 q10, d7, d7 \n" + "subs %2, %2, #16 \n" + "bhi 1b \n" + + "vadd.u32 q7, q7, q8 \n" + "vadd.u32 q9, q9, q10 \n" + "vadd.u32 q10, q7, q9 \n" + "vpaddl.u32 q1, q10 \n" + "vadd.u64 d0, d2, d3 \n" + "vmov.32 %3, d0[0] \n" + : "+r"(src_a), + "+r"(src_b), + "+r"(count), + "=r"(sse) + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10" + ); + return sse; +} +#elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) +#define HAS_SUMSQUAREERROR_SSE2 +__declspec(naked) +static uint32 SumSquareError_SSE2(const uint8* /*src_a*/, + const uint8* /*src_b*/, int /*count*/) { + __asm { + mov eax, [esp + 4] // src_a + mov edx, [esp + 8] // src_b + mov ecx, [esp + 12] // count + pxor xmm0, xmm0 + pxor xmm5, xmm5 + sub edx, eax + + wloop: + movdqu xmm1, [eax] + movdqu xmm2, [eax + edx] + lea eax, [eax + 16] + movdqu xmm3, xmm1 + psubusb xmm1, xmm2 + psubusb xmm2, xmm3 + por xmm1, xmm2 + movdqu xmm2, xmm1 + punpcklbw xmm1, xmm5 + punpckhbw xmm2, xmm5 + pmaddwd xmm1, xmm1 + pmaddwd xmm2, xmm2 + paddd xmm0, xmm1 + paddd xmm0, xmm2 + sub ecx, 16 + ja wloop + + pshufd xmm1, xmm0, 0EEh + paddd xmm0, xmm1 + pshufd xmm1, xmm0, 01h + paddd xmm0, xmm1 + movd eax, xmm0 + ret + } +} +#elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) +#define HAS_SUMSQUAREERROR_SSE2 +static uint32 SumSquareError_SSE2(const uint8* src_a, + const uint8* src_b, int count) { + uint32 sse; + asm volatile ( + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm5 \n" + "sub %0,%1 \n" + + "1: \n" + "movdqu (%0),%%xmm1 \n" + "movdqu (%0,%1,1),%%xmm2 \n" + "lea 0x10(%0),%0 \n" + "movdqu %%xmm1,%%xmm3 \n" + "psubusb %%xmm2,%%xmm1 \n" + "psubusb %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm1 \n" + "movdqu %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm2 \n" + "pmaddwd %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm2,%%xmm2 \n" + "paddd %%xmm1,%%xmm0 \n" + "paddd %%xmm2,%%xmm0 \n" + "sub $0x10,%2 \n" + "ja 1b \n" + + "pshufd $0xee,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "pshufd $0x1,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "movd %%xmm0,%3 \n" + + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=g"(sse) // %3 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm5" +#endif + ); + return sse; +} +#endif // LIBYUV_DISABLE_X86 etc + +#if defined(HAS_SUMSQUAREERROR_SSE2) +#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) +static __inline void __cpuid(int cpu_info[4], int info_type) { + asm volatile ( + "mov %%ebx, %%edi \n" + "cpuid \n" + "xchg %%edi, %%ebx \n" + : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(info_type) + ); +} +#elif defined(__i386__) || defined(__x86_64__) +static __inline void __cpuid(int cpu_info[4], int info_type) { + asm volatile ( + "cpuid \n" + : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(info_type) + ); +} +#endif + +static int CpuHasSSE2() { +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) + int cpu_info[4]; + __cpuid(cpu_info, 1); + if (cpu_info[3] & 0x04000000) { + return 1; + } +#endif + return 0; +} +#endif // HAS_SUMSQUAREERROR_SSE2 + +static uint32 SumSquareError_C(const uint8* src_a, + const uint8* src_b, int count) { + uint32 sse = 0u; + for (int x = 0; x < count; ++x) { + int diff = src_a[x] - src_b[x]; + sse += static_cast(diff * diff); + } + return sse; +} + +double ComputeSumSquareError(const uint8* src_a, + const uint8* src_b, int count) { + uint32 (*SumSquareError)(const uint8* src_a, + const uint8* src_b, int count) = SumSquareError_C; +#if defined(HAS_SUMSQUAREERROR_NEON) + SumSquareError = SumSquareError_NEON; +#endif +#if defined(HAS_SUMSQUAREERROR_SSE2) + if (CpuHasSSE2()) { + SumSquareError = SumSquareError_SSE2; + } +#endif + const int kBlockSize = 1 << 15; + uint64 sse = 0; +#ifdef _OPENMP +#pragma omp parallel for reduction(+: sse) +#endif + for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { + sse += SumSquareError(src_a + i, src_b + i, kBlockSize); + } + src_a += count & ~(kBlockSize - 1); + src_b += count & ~(kBlockSize - 1); + int remainder = count & (kBlockSize - 1) & ~15; + if (remainder) { + sse += SumSquareError(src_a, src_b, remainder); + src_a += remainder; + src_b += remainder; + } + remainder = count & 15; + if (remainder) { + sse += SumSquareError_C(src_a, src_b, remainder); + } + return static_cast(sse); +} diff --git a/util/psnr.h b/util/psnr.h new file mode 100644 index 000000000..5e6c2b8e5 --- /dev/null +++ b/util/psnr.h @@ -0,0 +1,28 @@ +/* + * Copyright 2013 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Get PSNR for video sequence. Assuming RAW 4:2:0 Y:Cb:Cr format + +#ifndef UTIL_PSNR_H_ +#define UTIL_PSNR_H_ + +typedef unsigned char uint8; + +static const double kMaxPSNR = 128.0; + +// PSNR formula: psnr = 10 * log10 (Peak Signal^2 * size / sse). +// Returns 128.0 (kMaxPSNR) if sse is 0 (perfect match). +double ComputePSNR(double sse, double size); + +// Computer Sum of Squared Error (SSE). +// Pass this to ComputePSNR for final result. +double ComputeSumSquareError(const uint8* org, const uint8* rec, int size); + +#endif // UTIL_PSNR_H_ diff --git a/util/psnr_main.cc b/util/psnr_main.cc new file mode 100644 index 000000000..1658ef178 --- /dev/null +++ b/util/psnr_main.cc @@ -0,0 +1,571 @@ +/* + * Copyright 2013 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Get PSNR or SSIM for video sequence. Assuming RAW 4:2:0 Y:Cb:Cr format +// To build: g++ -O3 -o psnr psnr.cc ssim.cc psnr_main.cc +// or VisualC: cl /Ox psnr.cc ssim.cc psnr_main.cc +// +// To enable OpenMP and SSE2 +// gcc: g++ -msse2 -O3 -fopenmp -o psnr psnr.cc ssim.cc psnr_main.cc +// vc: cl /arch:SSE2 /Ox /openmp psnr.cc ssim.cc psnr_main.cc +// +// Usage: psnr org_seq rec_seq -s width height [-skip skip_org skip_rec] + +#define _CRT_SECURE_NO_WARNINGS + +#include +#include +#include +#include +#ifdef _OPENMP +#include +#endif + +#include "./psnr.h" +#include "./ssim.h" + +struct metric { + double y, u, v, all; + double min_y, min_u, min_v, min_all; + double global_y, global_u, global_v, global_all; + int min_frame; +}; + +// options +bool verbose = false; +bool quiet = false; +bool show_name = false; +bool do_swap_uv = false; +bool do_psnr = false; +bool do_ssim = false; +bool do_mse = false; +bool do_lssim = false; +int image_width = 0, image_height = 0; +int fileindex_org = 0; +int fileindex_rec = 0; +int num_rec = 0; +int num_skip_org = 0; +int num_skip_rec = 0; +int num_frames = 0; +int do_yscale = 0; +int num_threads = 0; + +bool ExtractResolutionFromYuvFilename(const char *name, + int* width_ptr, + int* height_ptr) { + // Isolate the .width_heigth. section of the filename by searching for the + // one before last dot in the filename. + int dot1 = -1, dot2 = -1; + int i = 0; + while (name[i]) { + if ('.' == name[i]) { + dot1 = dot2; + dot2 = i; + } + i++; + } + if (-1 == dot1) { + return false; + } + + // Parse PYUV format. ie name.1920x800_24Hz_P420.yuv + int args = sscanf(name + dot1, ".%dx%d", width_ptr, height_ptr); + + // Parse .width_height.yuv + if (args != 2) { + args = sscanf(name + dot1, ".%d_%d.yuv", width_ptr, height_ptr); + } + return (2 == args); +} + + +// Scale Y channel from 16..240 to 0..255. +// This can be useful when comparing codecs that are inconsistant about Y +uint8 ScaleY(uint8 y) { + int ny = (y - 16) * 256 / 224; + if (ny < 0) ny = 0; + if (ny > 255) ny = 255; + return static_cast(ny); +} + +// MSE = Mean Square Error +double GetMSE(double sse, double size) { + return sse / size; +} + +void PrintHelp(const char * program) { + printf("%s [-options] org_seq rec_seq [rec_seq2.. etc]\n", program); + printf("options:\n"); + printf(" -s .... specify YUV size, mandatory if none of the " + "sequences have the\n"); + printf(" resolution embedded in their filename (ie. " + "bali.1920x800_24Hz_P420.yuv)\n"); + printf(" -psnr .................. compute PSNR (default)\n"); + printf(" -ssim .................. compute SSIM\n"); + printf(" -mse ................... compute MSE\n"); + printf(" -swap .................. Swap U and V plane\n"); + printf(" -skip ...... Number of frame to skip of org and rec\n"); + printf(" -frames .......... Number of frames to compare\n"); + printf(" -t ............... Number of threads\n"); + printf(" -yscale ................ Scale org Y values of 16..240 to 0..255\n"); + printf(" -n ..................... Show file name\n"); + printf(" -v ..................... verbose++\n"); + printf(" -q ..................... quiet\n"); + printf(" -h ..................... this help\n"); + exit(0); +} + +void ParseOptions(int argc, const char *argv[]) { + if (argc <= 1) PrintHelp(argv[0]); + for (int c = 1; c < argc; ++c) { + if (!strcmp(argv[c], "-v")) { + verbose = true; + } else if (!strcmp(argv[c], "-q")) { + quiet = true; + } else if (!strcmp(argv[c], "-n")) { + show_name = true; + } else if (!strcmp(argv[c], "-psnr")) { + do_psnr = true; + } else if (!strcmp(argv[c], "-mse")) { + do_mse = true; + } else if (!strcmp(argv[c], "-ssim")) { + do_ssim = true; + } else if (!strcmp(argv[c], "-lssim")) { + do_ssim = true; + do_lssim = true; + } else if (!strcmp(argv[c], "-swap")) { + do_swap_uv = true; + } else if (!strcmp(argv[c], "-yscale")) { + do_yscale = true; + } else if (!strcmp(argv[c], "-h") || !strcmp(argv[c], "-help")) { + PrintHelp(argv[0]); + } else if (!strcmp(argv[c], "-s") && c + 2 < argc) { + image_width = atoi(argv[++c]); // NOLINT + image_height = atoi(argv[++c]); // NOLINT + } else if (!strcmp(argv[c], "-skip") && c + 2 < argc) { + num_skip_org = atoi(argv[++c]); // NOLINT + num_skip_rec = atoi(argv[++c]); // NOLINT + } else if (!strcmp(argv[c], "-frames") && c + 1 < argc) { + num_frames = atoi(argv[++c]); // NOLINT + } else if (!strcmp(argv[c], "-t") && c + 1 < argc) { + num_threads = atoi(argv[++c]); // NOLINT + } else if (fileindex_org == 0) { + fileindex_org = c; + } else if (fileindex_rec == 0) { + fileindex_rec = c; + num_rec = 1; + } else { + ++num_rec; + } + } + if (fileindex_org == 0 || fileindex_rec == 0) { + printf("Missing filenames\n"); + PrintHelp(argv[0]); + } + if (num_skip_org < 0 || num_skip_rec < 0) { + printf("Skipped frames incorrect\n"); + PrintHelp(argv[0]); + } + if (num_frames < 0) { + printf("Number of frames incorrect\n"); + PrintHelp(argv[0]); + } + if (image_width <= 0 || image_height <=0) { + int org_width, org_height; + int rec_width, rec_height; + bool org_res_avail = ExtractResolutionFromYuvFilename(argv[fileindex_org], + &org_width, + &org_height); + bool rec_res_avail = ExtractResolutionFromYuvFilename(argv[fileindex_rec], + &rec_width, + &rec_height); + if (org_res_avail) { + if (rec_res_avail) { + if ((org_width == rec_width) && (org_height == rec_height)) { + image_width = org_width; + image_height = org_height; + } else { + printf("Sequences have different resolutions.\n"); + PrintHelp(argv[0]); + } + } else { + image_width = org_width; + image_height = org_height; + } + } else if (rec_res_avail) { + image_width = rec_width; + image_height = rec_height; + } else { + printf("Missing dimensions.\n"); + PrintHelp(argv[0]); + } + } +} + +bool UpdateMetrics(uint8* ch_org, uint8* ch_rec, + const int y_size, const int uv_size, const size_t total_size, + int number_of_frames, + metric* cur_distortion_psnr, + metric* distorted_frame, bool do_psnr) { + const int uv_offset = (do_swap_uv ? uv_size : 0); + const uint8* const u_org = ch_org + y_size + uv_offset; + const uint8* const u_rec = ch_rec + y_size; + const uint8* const v_org = ch_org + y_size + (uv_size - uv_offset); + const uint8* const v_rec = ch_rec + y_size + uv_size; + if (do_yscale) { + for (int i = 0; i < image_width*image_height; ++i) { + ch_org[i] = ScaleY(ch_org[i]); + } + } + if (do_psnr) { + double y_err = ComputeSumSquareError(ch_org, ch_rec, y_size); + double u_err = ComputeSumSquareError(u_org, u_rec, uv_size); + double v_err = ComputeSumSquareError(v_org, v_rec, uv_size); + const double total_err = y_err + u_err + v_err; + cur_distortion_psnr->global_y += y_err; + cur_distortion_psnr->global_u += u_err; + cur_distortion_psnr->global_v += v_err; + cur_distortion_psnr->global_all += total_err; + distorted_frame->y = ComputePSNR(y_err, static_cast(y_size)); + distorted_frame->u = ComputePSNR(u_err, static_cast(uv_size)); + distorted_frame->v = ComputePSNR(v_err, static_cast(uv_size)); + distorted_frame->all = ComputePSNR(total_err, + static_cast(total_size)); + } else { + distorted_frame->y = CalcSSIM(ch_org, ch_rec, image_width, image_height); + distorted_frame->u = CalcSSIM(u_org, u_rec, image_width / 2, + image_height / 2); + distorted_frame->v = CalcSSIM(v_org, v_rec, image_width / 2, + image_height / 2); + distorted_frame->all = + (distorted_frame->y + distorted_frame->u + distorted_frame->v) + / total_size; + distorted_frame->y /= y_size; + distorted_frame->u /= uv_size; + distorted_frame->v /= uv_size; + + if (do_lssim) { + distorted_frame->all = CalcLSSIM(distorted_frame->all); + distorted_frame->y = CalcLSSIM(distorted_frame->y); + distorted_frame->u = CalcLSSIM(distorted_frame->u); + distorted_frame->v = CalcLSSIM(distorted_frame->v); + } + } + + cur_distortion_psnr->y += distorted_frame->y; + cur_distortion_psnr->u += distorted_frame->u; + cur_distortion_psnr->v += distorted_frame->v; + cur_distortion_psnr->all += distorted_frame->all; + + bool ismin = false; + if (distorted_frame->y < cur_distortion_psnr->min_y) + cur_distortion_psnr->min_y = distorted_frame->y; + if (distorted_frame->u < cur_distortion_psnr->min_u) + cur_distortion_psnr->min_u = distorted_frame->u; + if (distorted_frame->v < cur_distortion_psnr->min_v) + cur_distortion_psnr->min_v = distorted_frame->v; + if (distorted_frame->all < cur_distortion_psnr->min_all) { + cur_distortion_psnr->min_all = distorted_frame->all; + cur_distortion_psnr->min_frame = number_of_frames; + ismin = true; + } + return ismin; +} + +int main(int argc, const char *argv[]) { + ParseOptions(argc, argv); + if (!do_psnr && !do_ssim) { + do_psnr = true; + } + +#ifdef _OPENMP + if (num_threads) { + omp_set_num_threads(num_threads); + } + if (verbose) { + printf("OpenMP %d procs\n", omp_get_num_procs()); + } +#endif + // Open original file (first file argument) + FILE* const file_org = fopen(argv[fileindex_org], "rb"); + if (file_org == NULL) { + fprintf(stderr, "Cannot open %s\n", argv[fileindex_org]); + exit(1); + } + + // Open all files to compare to + FILE** file_rec = new FILE *[num_rec]; + memset(file_rec, 0, num_rec * sizeof(FILE*)); // NOLINT + for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { + file_rec[cur_rec] = fopen(argv[fileindex_rec+cur_rec], "rb"); + if (file_rec[cur_rec] == NULL) { + fprintf(stderr, "Cannot open %s\n", argv[fileindex_rec+cur_rec]); + fclose(file_org); + for (int i = 0; i < cur_rec; ++i) { + fclose(file_rec[i]); + } + delete[] file_rec; + exit(1); + } + } + + const int y_size = image_width * image_height; + const int uv_size = (image_width >> 1) * (image_height >> 1); + const size_t total_size = y_size + 2 * uv_size; // NOLINT +#if defined(_MSC_VER) + _fseeki64(file_org, + static_cast<__int64>(num_skip_org) * + static_cast<__int64>(total_size), SEEK_SET); +#else + fseek(file_org, num_skip_org * total_size, SEEK_SET); +#endif + for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { +#if defined(_MSC_VER) + _fseeki64(file_rec[cur_rec], + static_cast<__int64>(num_skip_rec) * + static_cast<__int64>(total_size), + SEEK_SET); +#else + fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET); +#endif + } + + uint8* const ch_org = new uint8[total_size]; + uint8* const ch_rec = new uint8[total_size]; + if (ch_org == NULL || ch_rec == NULL) { + fprintf(stderr, "No memory available\n"); + fclose(file_org); + for (int i = 0; i < num_rec; ++i) { + fclose(file_rec[i]); + } + delete[] ch_org; + delete[] ch_rec; + delete[] file_rec; + exit(1); + } + + metric* const distortion_psnr = new metric[num_rec]; + metric* const distortion_ssim = new metric[num_rec]; + for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { + metric* cur_distortion_psnr = &distortion_psnr[cur_rec]; + cur_distortion_psnr->y = 0.0; + cur_distortion_psnr->u = 0.0; + cur_distortion_psnr->v = 0.0; + cur_distortion_psnr->all = 0.0; + cur_distortion_psnr->min_y = kMaxPSNR; + cur_distortion_psnr->min_u = kMaxPSNR; + cur_distortion_psnr->min_v = kMaxPSNR; + cur_distortion_psnr->min_all = kMaxPSNR; + cur_distortion_psnr->min_frame = 0; + cur_distortion_psnr->global_y = 0.0; + cur_distortion_psnr->global_u = 0.0; + cur_distortion_psnr->global_v = 0.0; + cur_distortion_psnr->global_all = 0.0; + distortion_ssim[cur_rec] = cur_distortion_psnr[cur_rec]; + } + + if (verbose) { + printf("Size: %dx%d\n", image_width, image_height); + } + + if (!quiet) { + printf("Frame"); + if (do_psnr) { + printf("\t PSNR-Y \t PSNR-U \t PSNR-V \t PSNR-All \t Frame"); + } + if (do_ssim) { + printf("\t SSIM-Y\t SSIM-U\t SSIM-V\t SSIM-All\t Frame"); + } + if (show_name) { + printf("\tName\n"); + } else { + printf("\n"); + } + } + + int number_of_frames; + for (number_of_frames = 0; ; ++number_of_frames) { + if (num_frames && number_of_frames >= num_frames) + break; + + size_t bytes_org = fread(ch_org, sizeof(uint8), total_size, file_org); + if (bytes_org < total_size) + break; + + for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { + size_t bytes_rec = fread(ch_rec, sizeof(uint8), + total_size, file_rec[cur_rec]); + if (bytes_rec < total_size) + break; + + if (verbose) { + printf("%5d", number_of_frames); + } + if (do_psnr) { + metric distorted_frame; + metric* cur_distortion_psnr = &distortion_psnr[cur_rec]; + bool ismin = UpdateMetrics(ch_org, ch_rec, + y_size, uv_size, total_size, + number_of_frames, + cur_distortion_psnr, + &distorted_frame, true); + if (verbose) { + printf("\t%10.6f", distorted_frame.y); + printf("\t%10.6f", distorted_frame.u); + printf("\t%10.6f", distorted_frame.v); + printf("\t%10.6f", distorted_frame.all); + printf("\t%5s", ismin ? "min" : ""); + } + } + if (do_ssim) { + metric distorted_frame; + metric* cur_distortion_ssim = &distortion_ssim[cur_rec]; + bool ismin = UpdateMetrics(ch_org, ch_rec, + y_size, uv_size, total_size, + number_of_frames, + cur_distortion_ssim, + &distorted_frame, false); + if (verbose) { + printf("\t%10.6f", distorted_frame.y); + printf("\t%10.6f", distorted_frame.u); + printf("\t%10.6f", distorted_frame.v); + printf("\t%10.6f", distorted_frame.all); + printf("\t%5s", ismin ? "min" : ""); + } + } + if (verbose) { + if (show_name) { + printf("\t%s", argv[fileindex_rec+cur_rec]); + } + printf("\n"); + } + } + } + + // Final PSNR computation. + for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { + metric* cur_distortion_psnr = &distortion_psnr[cur_rec]; + metric* cur_distortion_ssim = &distortion_ssim[cur_rec]; + if (number_of_frames > 0) { + const double norm = 1. / static_cast(number_of_frames); + cur_distortion_psnr->y *= norm; + cur_distortion_psnr->u *= norm; + cur_distortion_psnr->v *= norm; + cur_distortion_psnr->all *= norm; + cur_distortion_ssim->y *= norm; + cur_distortion_ssim->u *= norm; + cur_distortion_ssim->v *= norm; + cur_distortion_ssim->all *= norm; + } + + if (do_psnr) { + const double global_psnr_y = ComputePSNR( + cur_distortion_psnr->global_y, + static_cast(y_size) * number_of_frames); + const double global_psnr_u = ComputePSNR( + cur_distortion_psnr->global_u, + static_cast(uv_size) * number_of_frames); + const double global_psnr_v = ComputePSNR( + cur_distortion_psnr->global_v, + static_cast(uv_size) * number_of_frames); + const double global_psnr_all = ComputePSNR( + cur_distortion_psnr->global_all, + static_cast(total_size) * number_of_frames); + printf("Global:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", + global_psnr_y, + global_psnr_u, + global_psnr_v, + global_psnr_all, + number_of_frames); + if (show_name) { + printf("\t%s", argv[fileindex_rec+cur_rec]); + } + printf("\n"); + } + + if (!quiet) { + printf("Avg:"); + if (do_psnr) { + printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", + cur_distortion_psnr->y, + cur_distortion_psnr->u, + cur_distortion_psnr->v, + cur_distortion_psnr->all, + number_of_frames); + } + if (do_ssim) { + printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", + cur_distortion_ssim->y, + cur_distortion_ssim->u, + cur_distortion_ssim->v, + cur_distortion_ssim->all, + number_of_frames); + } + if (show_name) { + printf("\t%s", argv[fileindex_rec+cur_rec]); + } + printf("\n"); + } + if (!quiet) { + printf("Min:"); + if (do_psnr) { + printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", + cur_distortion_psnr->min_y, + cur_distortion_psnr->min_u, + cur_distortion_psnr->min_v, + cur_distortion_psnr->min_all, + cur_distortion_psnr->min_frame); + } + if (do_ssim) { + printf("\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", + cur_distortion_ssim->min_y, + cur_distortion_ssim->min_u, + cur_distortion_ssim->min_v, + cur_distortion_ssim->min_all, + cur_distortion_ssim->min_frame); + } + if (show_name) { + printf("\t%s", argv[fileindex_rec+cur_rec]); + } + printf("\n"); + } + + if (do_mse) { + double global_mse_y = GetMSE(cur_distortion_psnr->global_y, + static_cast(y_size) * number_of_frames); + double global_mse_u = GetMSE(cur_distortion_psnr->global_u, + static_cast(uv_size) * number_of_frames); + double global_mse_v = GetMSE(cur_distortion_psnr->global_v, + static_cast(uv_size) * number_of_frames); + double global_mse_all = GetMSE(cur_distortion_psnr->global_all, + static_cast(total_size) * number_of_frames); + printf("MSE:\t%10.6f\t%10.6f\t%10.6f\t%10.6f\t%5d", + global_mse_y, + global_mse_u, + global_mse_v, + global_mse_all, + number_of_frames); + if (show_name) { + printf("\t%s", argv[fileindex_rec+cur_rec]); + } + printf("\n"); + } + } + fclose(file_org); + for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { + fclose(file_rec[cur_rec]); + } + delete[] distortion_psnr; + delete[] distortion_ssim; + delete[] ch_org; + delete[] ch_rec; + delete[] file_rec; + return 0; +} diff --git a/util/ssim.cc b/util/ssim.cc new file mode 100644 index 000000000..9ebbdab43 --- /dev/null +++ b/util/ssim.cc @@ -0,0 +1,327 @@ +/* + * Copyright 2013 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./ssim.h" + +#include +#include + +typedef unsigned int uint32; // NOLINT +typedef unsigned short uint16; // NOLINT + +#if defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) +#define __SSE2__ +#endif +#ifdef __SSE2__ +#include +#endif + +#ifdef _OPENMP +#include +#endif + +// SSIM +enum { KERNEL = 3, KERNEL_SIZE = 2 * KERNEL + 1 }; + +// Symmetric Gaussian kernel: K[i] = ~11 * exp(-0.3 * i * i) +// The maximum value (11 x 11) must be less than 128 to avoid sign +// problems during the calls to _mm_mullo_epi16(). +static const int K[KERNEL_SIZE] = { + 1, 3, 7, 11, 7, 3, 1 // ~11 * exp(-0.3 * i * i) +}; +static const double kiW[KERNEL + 1 + 1] = { + 1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j] + 1. / 1089., // 1 / sum(i:0..6, j..6) K[i]*K[j] + 1. / 1056., // 1 / sum(i:0..5, j..6) K[i]*K[j] + 1. / 957., // 1 / sum(i:0..4, j..6) K[i]*K[j] + 1. / 726., // 1 / sum(i:0..3, j..6) K[i]*K[j] +}; + +#ifdef __SSE2__ + +#define PWEIGHT(A, B) static_cast(K[(A)] * K[(B)]) // weight product +#define MAKE_WEIGHT(L) \ + { { { PWEIGHT(L, 0), PWEIGHT(L, 1), PWEIGHT(L, 2), PWEIGHT(L, 3), \ + PWEIGHT(L, 4), PWEIGHT(L, 5), PWEIGHT(L, 6), 0 } } } + +// We need this union trick to be able to initialize constant static __m128i +// values. We can't call _mm_set_epi16() for static compile-time initialization. +static const struct { + union { + uint16 i16_[8]; + __m128i m_; + } values_; +} W0 = MAKE_WEIGHT(0), + W1 = MAKE_WEIGHT(1), + W2 = MAKE_WEIGHT(2), + W3 = MAKE_WEIGHT(3); + // ... the rest is symmetric. +#undef MAKE_WEIGHT +#undef PWEIGHT +#endif + +// Common final expression for SSIM, once the weighted sums are known. +static double FinalizeSSIM(double iw, double xm, double ym, + double xxm, double xym, double yym) { + const double iwx = xm * iw; + const double iwy = ym * iw; + double sxx = xxm * iw - iwx * iwx; + double syy = yym * iw - iwy * iwy; + // small errors are possible, due to rounding. Clamp to zero. + if (sxx < 0.) sxx = 0.; + if (syy < 0.) syy = 0.; + const double sxsy = sqrt(sxx * syy); + const double sxy = xym * iw - iwx * iwy; + static const double C11 = (0.01 * 0.01) * (255 * 255); + static const double C22 = (0.03 * 0.03) * (255 * 255); + static const double C33 = (0.015 * 0.015) * (255 * 255); + const double l = (2. * iwx * iwy + C11) / (iwx * iwx + iwy * iwy + C11); + const double c = (2. * sxsy + C22) / (sxx + syy + C22); + const double s = (sxy + C33) / (sxsy + C33); + return l * c * s; +} + +// GetSSIM() does clipping. GetSSIMFullKernel() does not + +// TODO(skal): use summed tables? +// Note: worst case of accumulation is a weight of 33 = 11 + 2 * (7 + 3 + 1) +// with a diff of 255, squared. The maximum error is thus 0x4388241, +// which fits into 32 bits integers. +double GetSSIM(const uint8 *org, const uint8 *rec, + int xo, int yo, int W, int H, int stride) { + uint32 ws = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; + org += (yo - KERNEL) * stride; + org += (xo - KERNEL); + rec += (yo - KERNEL) * stride; + rec += (xo - KERNEL); + for (int y_ = 0; y_ < KERNEL_SIZE; ++y_, org += stride, rec += stride) { + if (((yo - KERNEL + y_) < 0) || ((yo - KERNEL + y_) >= H)) continue; + const int Wy = K[y_]; + for (int x_ = 0; x_ < KERNEL_SIZE; ++x_) { + const int Wxy = Wy * K[x_]; + if (((xo - KERNEL + x_) >= 0) && ((xo - KERNEL + x_) < W)) { + const int org_x = org[x_]; + const int rec_x = rec[x_]; + ws += Wxy; + xm += Wxy * org_x; + ym += Wxy * rec_x; + xxm += Wxy * org_x * org_x; + xym += Wxy * org_x * rec_x; + yym += Wxy * rec_x * rec_x; + } + } + } + return FinalizeSSIM(1. / ws, xm, ym, xxm, xym, yym); +} + +double GetSSIMFullKernel(const uint8 *org, const uint8 *rec, + int xo, int yo, int stride, + double area_weight) { + uint32 xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0; + +#ifndef __SSE2__ + + org += yo * stride + xo; + rec += yo * stride + xo; + for (int y = 1; y <= KERNEL; y++) { + const int dy1 = y * stride; + const int dy2 = y * stride; + const int Wy = K[KERNEL + y]; + + for (int x = 1; x <= KERNEL; x++) { + // Compute the contributions of upper-left (ul), upper-right (ur) + // lower-left (ll) and lower-right (lr) points (see the diagram below). + // Symmetric Kernel will have same weight on those points. + // - - - - - - - + // - ul - - - ur - + // - - - - - - - + // - - - 0 - - - + // - - - - - - - + // - ll - - - lr - + // - - - - - - - + const int Wxy = Wy * K[KERNEL + x]; + const int ul1 = org[-dy1 - x]; + const int ur1 = org[-dy1 + x]; + const int ll1 = org[dy1 - x]; + const int lr1 = org[dy1 + x]; + + const int ul2 = rec[-dy2 - x]; + const int ur2 = rec[-dy2 + x]; + const int ll2 = rec[dy2 - x]; + const int lr2 = rec[dy2 + x]; + + xm += Wxy * (ul1 + ur1 + ll1 + lr1); + ym += Wxy * (ul2 + ur2 + ll2 + lr2); + xxm += Wxy * (ul1 * ul1 + ur1 * ur1 + ll1 * ll1 + lr1 * lr1); + xym += Wxy * (ul1 * ul2 + ur1 * ur2 + ll1 * ll2 + lr1 * lr2); + yym += Wxy * (ul2 * ul2 + ur2 * ur2 + ll2 * ll2 + lr2 * lr2); + } + + // Compute the contributions of up (u), down (d), left (l) and right (r) + // points across the main axes (see the diagram below). + // Symmetric Kernel will have same weight on those points. + // - - - - - - - + // - - - u - - - + // - - - - - - - + // - l - 0 - r - + // - - - - - - - + // - - - d - - - + // - - - - - - - + const int Wxy = Wy * K[KERNEL]; + const int u1 = org[-dy1]; + const int d1 = org[dy1]; + const int l1 = org[-y]; + const int r1 = org[y]; + + const int u2 = rec[-dy2]; + const int d2 = rec[dy2]; + const int l2 = rec[-y]; + const int r2 = rec[y]; + + xm += Wxy * (u1 + d1 + l1 + r1); + ym += Wxy * (u2 + d2 + l2 + r2); + xxm += Wxy * (u1 * u1 + d1 * d1 + l1 * l1 + r1 * r1); + xym += Wxy * (u1 * u2 + d1 * d2 + l1 * l2 + r1 * r2); + yym += Wxy * (u2 * u2 + d2 * d2 + l2 * l2 + r2 * r2); + } + + // Lastly the contribution of (x0, y0) point. + const int Wxy = K[KERNEL] * K[KERNEL]; + const int s1 = org[0]; + const int s2 = rec[0]; + + xm += Wxy * s1; + ym += Wxy * s2; + xxm += Wxy * s1 * s1; + xym += Wxy * s1 * s2; + yym += Wxy * s2 * s2; + +#else // __SSE2__ + + org += (yo - KERNEL) * stride + (xo - KERNEL); + rec += (yo - KERNEL) * stride + (xo - KERNEL); + + const __m128i zero = _mm_setzero_si128(); + __m128i x = zero; + __m128i y = zero; + __m128i xx = zero; + __m128i xy = zero; + __m128i yy = zero; + +// Read 8 pixels at line #L, and convert to 16bit, perform weighting +// and acccumulate. +#define LOAD_LINE_PAIR(L, WEIGHT) do { \ + const __m128i v0 = \ + _mm_loadl_epi64(reinterpret_cast(org + (L) * stride)); \ + const __m128i v1 = \ + _mm_loadl_epi64(reinterpret_cast(rec + (L) * stride)); \ + const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \ + const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \ + const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \ + const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \ + x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \ + y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \ + x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \ + y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \ + xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \ + xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \ + yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \ +} while (0) + +#define ADD_AND_STORE_FOUR_EPI32(M, OUT) do { \ + uint32 tmp[4]; \ + _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \ + (OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \ +} while (0) + + LOAD_LINE_PAIR(0, W0); + LOAD_LINE_PAIR(1, W1); + LOAD_LINE_PAIR(2, W2); + LOAD_LINE_PAIR(3, W3); + LOAD_LINE_PAIR(4, W2); + LOAD_LINE_PAIR(5, W1); + LOAD_LINE_PAIR(6, W0); + + ADD_AND_STORE_FOUR_EPI32(x, xm); + ADD_AND_STORE_FOUR_EPI32(y, ym); + ADD_AND_STORE_FOUR_EPI32(xx, xxm); + ADD_AND_STORE_FOUR_EPI32(xy, xym); + ADD_AND_STORE_FOUR_EPI32(yy, yym); + +#undef LOAD_LINE_PAIR +#undef ADD_AND_STORE_FOUR_EPI32 +#endif + + return FinalizeSSIM(area_weight, xm, ym, xxm, xym, yym); +} + +static int start_max(int x, int y) { return (x > y) ? x : y; } + +double CalcSSIM(const uint8 *org, const uint8 *rec, + const int image_width, const int image_height) { + double SSIM = 0.; + const int KERNEL_Y = (image_height < KERNEL) ? image_height : KERNEL; + const int KERNEL_X = (image_width < KERNEL) ? image_width : KERNEL; + const int start_x = start_max(image_width - 8 + KERNEL_X, KERNEL_X); + const int start_y = start_max(image_height - KERNEL_Y, KERNEL_Y); + const int stride = image_width; + + for (int j = 0; j < KERNEL_Y; ++j) { + for (int i = 0; i < image_width; ++i) { + SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride); + } + } + +#ifdef _OPENMP + #pragma omp parallel for reduction(+: SSIM) +#endif + for (int j = KERNEL_Y; j < image_height - KERNEL_Y; ++j) { + for (int i = 0; i < KERNEL_X; ++i) { + SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride); + } + for (int i = KERNEL_X; i < start_x; ++i) { + SSIM += GetSSIMFullKernel(org, rec, i, j, stride, kiW[0]); + } + if (start_x < image_width) { + // GetSSIMFullKernel() needs to be able to read 8 pixels (in SSE2). So we + // copy the 8 rightmost pixels on a cache area, and pad this area with + // zeros which won't contribute to the overall SSIM value (but we need + // to pass the correct normalizing constant!). By using this cache, we can + // still call GetSSIMFullKernel() instead of the slower GetSSIM(). + // NOTE: we could use similar method for the left-most pixels too. + const int kScratchWidth = 8; + const int kScratchStride = kScratchWidth + KERNEL + 1; + uint8 scratch_org[KERNEL_SIZE * kScratchStride] = { 0 }; + uint8 scratch_rec[KERNEL_SIZE * kScratchStride] = { 0 }; + + for (int k = 0; k < KERNEL_SIZE; ++k) { + const int offset = + (j - KERNEL + k) * stride + image_width - kScratchWidth; + memcpy(scratch_org + k * kScratchStride, org + offset, kScratchWidth); + memcpy(scratch_rec + k * kScratchStride, rec + offset, kScratchWidth); + } + for (int k = 0; k <= KERNEL_X + 1; ++k) { + SSIM += GetSSIMFullKernel(scratch_org, scratch_rec, + KERNEL + k, KERNEL, kScratchStride, kiW[k]); + } + } + } + + for (int j = start_y; j < image_height; ++j) { + for (int i = 0; i < image_width; ++i) { + SSIM += GetSSIM(org, rec, i, j, image_width, image_height, stride); + } + } + return SSIM; +} + +double CalcLSSIM(double ssim) { + return -10.0 * log10(1.0 - ssim); +} diff --git a/util/ssim.h b/util/ssim.h new file mode 100644 index 000000000..baec20c4f --- /dev/null +++ b/util/ssim.h @@ -0,0 +1,24 @@ +/* + * Copyright 2013 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Get SSIM for video sequence. Assuming RAW 4:2:0 Y:Cb:Cr format + +#ifndef UTIL_SSIM_H_ +#define UTIL_SSIM_H_ + +typedef unsigned char uint8; + +double CalcSSIM(const uint8* org, const uint8* rec, + const int image_width, const int image_height); + +// does -10.0 * log10(1.0 - ssim) +double CalcLSSIM(double ssim); + +#endif // UTIL_SSIM_H_