LCOV - code coverage report
Current view: top level - tests/checkasm - checkasm.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 177 270 65.6 %
Date: 2018-05-20 11:54:08 Functions: 21 31 67.7 %

          Line data    Source code
       1             : /*
       2             :  * Assembly testing and benchmarking tool
       3             :  * Copyright (c) 2015 Henrik Gramner
       4             :  * Copyright (c) 2008 Loren Merritt
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or modify
       9             :  * it under the terms of the GNU General Public License as published by
      10             :  * the Free Software Foundation; either version 2 of the License, or
      11             :  * (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16             :  * GNU General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU General Public License along
      19             :  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
      20             :  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
      21             :  */
      22             : 
      23             : #include "config.h"
      24             : 
      25             : #if CONFIG_LINUX_PERF
      26             : # ifndef _GNU_SOURCE
      27             : #  define _GNU_SOURCE // for syscall (performance monitoring API)
      28             : # endif
      29             : #endif
      30             : 
      31             : #include <stdarg.h>
      32             : #include <stdio.h>
      33             : #include <stdlib.h>
      34             : #include <string.h>
      35             : #include "checkasm.h"
      36             : #include "libavutil/common.h"
      37             : #include "libavutil/cpu.h"
      38             : #include "libavutil/intfloat.h"
      39             : #include "libavutil/random_seed.h"
      40             : 
      41             : #if HAVE_IO_H
      42             : #include <io.h>
      43             : #endif
      44             : 
      45             : #if HAVE_SETCONSOLETEXTATTRIBUTE
      46             : #include <windows.h>
      47             : #define COLOR_RED    FOREGROUND_RED
      48             : #define COLOR_GREEN  FOREGROUND_GREEN
      49             : #define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
      50             : #else
      51             : #define COLOR_RED    1
      52             : #define COLOR_GREEN  2
      53             : #define COLOR_YELLOW 3
      54             : #endif
      55             : 
      56             : #if HAVE_UNISTD_H
      57             : #include <unistd.h>
      58             : #endif
      59             : 
      60             : #if !HAVE_ISATTY
      61             : #define isatty(fd) 1
      62             : #endif
      63             : 
      64             : #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
      65             : #include "libavutil/arm/cpu.h"
      66             : 
      67             : void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
      68             : #endif
      69             : 
      70             : /* List of tests to invoke */
      71             : static const struct {
      72             :     const char *name;
      73             :     void (*func)(void);
      74             : } tests[] = {
      75             : #if CONFIG_AVCODEC
      76             :     #if CONFIG_AAC_DECODER
      77             :         { "aacpsdsp", checkasm_check_aacpsdsp },
      78             :         { "sbrdsp",   checkasm_check_sbrdsp },
      79             :     #endif
      80             :     #if CONFIG_ALAC_DECODER
      81             :         { "alacdsp", checkasm_check_alacdsp },
      82             :     #endif
      83             :     #if CONFIG_AUDIODSP
      84             :         { "audiodsp", checkasm_check_audiodsp },
      85             :     #endif
      86             :     #if CONFIG_BLOCKDSP
      87             :         { "blockdsp", checkasm_check_blockdsp },
      88             :     #endif
      89             :     #if CONFIG_BSWAPDSP
      90             :         { "bswapdsp", checkasm_check_bswapdsp },
      91             :     #endif
      92             :     #if CONFIG_DCA_DECODER
      93             :         { "synth_filter", checkasm_check_synth_filter },
      94             :     #endif
      95             :     #if CONFIG_EXR_DECODER
      96             :         { "exrdsp", checkasm_check_exrdsp },
      97             :     #endif
      98             :     #if CONFIG_FLACDSP
      99             :         { "flacdsp", checkasm_check_flacdsp },
     100             :     #endif
     101             :     #if CONFIG_FMTCONVERT
     102             :         { "fmtconvert", checkasm_check_fmtconvert },
     103             :     #endif
     104             :     #if CONFIG_G722DSP
     105             :         { "g722dsp", checkasm_check_g722dsp },
     106             :     #endif
     107             :     #if CONFIG_H264DSP
     108             :         { "h264dsp", checkasm_check_h264dsp },
     109             :     #endif
     110             :     #if CONFIG_H264PRED
     111             :         { "h264pred", checkasm_check_h264pred },
     112             :     #endif
     113             :     #if CONFIG_H264QPEL
     114             :         { "h264qpel", checkasm_check_h264qpel },
     115             :     #endif
     116             :     #if CONFIG_HEVC_DECODER
     117             :         { "hevc_add_res", checkasm_check_hevc_add_res },
     118             :         { "hevc_idct", checkasm_check_hevc_idct },
     119             :         { "hevc_sao", checkasm_check_hevc_sao },
     120             :     #endif
     121             :     #if CONFIG_HUFFYUV_DECODER
     122             :         { "huffyuvdsp", checkasm_check_huffyuvdsp },
     123             :     #endif
     124             :     #if CONFIG_JPEG2000_DECODER
     125             :         { "jpeg2000dsp", checkasm_check_jpeg2000dsp },
     126             :     #endif
     127             :     #if CONFIG_HUFFYUVDSP
     128             :         { "llviddsp", checkasm_check_llviddsp },
     129             :     #endif
     130             :     #if CONFIG_LLVIDENCDSP
     131             :         { "llviddspenc", checkasm_check_llviddspenc },
     132             :     #endif
     133             :     #if CONFIG_PIXBLOCKDSP
     134             :         { "pixblockdsp", checkasm_check_pixblockdsp },
     135             :     #endif
     136             :     #if CONFIG_UTVIDEO_DECODER
     137             :         { "utvideodsp", checkasm_check_utvideodsp },
     138             :     #endif
     139             :     #if CONFIG_V210_ENCODER
     140             :         { "v210enc", checkasm_check_v210enc },
     141             :     #endif
     142             :     #if CONFIG_VP8DSP
     143             :         { "vp8dsp", checkasm_check_vp8dsp },
     144             :     #endif
     145             :     #if CONFIG_VP9_DECODER
     146             :         { "vp9dsp", checkasm_check_vp9dsp },
     147             :     #endif
     148             :     #if CONFIG_VIDEODSP
     149             :         { "videodsp", checkasm_check_videodsp },
     150             :     #endif
     151             : #endif
     152             : #if CONFIG_AVFILTER
     153             :     #if CONFIG_BLEND_FILTER
     154             :         { "vf_blend", checkasm_check_blend },
     155             :     #endif
     156             :     #if CONFIG_COLORSPACE_FILTER
     157             :         { "vf_colorspace", checkasm_check_colorspace },
     158             :     #endif
     159             :     #if CONFIG_HFLIP_FILTER
     160             :         { "vf_hflip", checkasm_check_vf_hflip },
     161             :     #endif
     162             :     #if CONFIG_NLMEANS_FILTER
     163             :         { "vf_nlmeans", checkasm_check_nlmeans },
     164             :     #endif
     165             :     #if CONFIG_THRESHOLD_FILTER
     166             :         { "vf_threshold", checkasm_check_vf_threshold },
     167             :     #endif
     168             : #endif
     169             : #if CONFIG_SWSCALE
     170             :     { "sw_rgb", checkasm_check_sw_rgb },
     171             : #endif
     172             : #if CONFIG_AVUTIL
     173             :         { "fixed_dsp", checkasm_check_fixed_dsp },
     174             :         { "float_dsp", checkasm_check_float_dsp },
     175             : #endif
     176             :     { NULL }
     177             : };
     178             : 
     179             : /* List of cpu flags to check */
     180             : static const struct {
     181             :     const char *name;
     182             :     const char *suffix;
     183             :     int flag;
     184             : } cpus[] = {
     185             : #if   ARCH_AARCH64
     186             :     { "ARMV8",    "armv8",    AV_CPU_FLAG_ARMV8 },
     187             :     { "NEON",     "neon",     AV_CPU_FLAG_NEON },
     188             : #elif ARCH_ARM
     189             :     { "ARMV5TE",  "armv5te",  AV_CPU_FLAG_ARMV5TE },
     190             :     { "ARMV6",    "armv6",    AV_CPU_FLAG_ARMV6 },
     191             :     { "ARMV6T2",  "armv6t2",  AV_CPU_FLAG_ARMV6T2 },
     192             :     { "VFP",      "vfp",      AV_CPU_FLAG_VFP },
     193             :     { "VFP_VM",   "vfp_vm",   AV_CPU_FLAG_VFP_VM },
     194             :     { "VFPV3",    "vfp3",     AV_CPU_FLAG_VFPV3 },
     195             :     { "NEON",     "neon",     AV_CPU_FLAG_NEON },
     196             : #elif ARCH_PPC
     197             :     { "ALTIVEC",  "altivec",  AV_CPU_FLAG_ALTIVEC },
     198             :     { "VSX",      "vsx",      AV_CPU_FLAG_VSX },
     199             :     { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
     200             : #elif ARCH_X86
     201             :     { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
     202             :     { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
     203             :     { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
     204             :     { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
     205             :     { "SSE",      "sse",      AV_CPU_FLAG_SSE },
     206             :     { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
     207             :     { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
     208             :     { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
     209             :     { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
     210             :     { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
     211             :     { "AES-NI",   "aesni",    AV_CPU_FLAG_AESNI },
     212             :     { "AVX",      "avx",      AV_CPU_FLAG_AVX },
     213             :     { "XOP",      "xop",      AV_CPU_FLAG_XOP },
     214             :     { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
     215             :     { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
     216             :     { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
     217             :     { "AVX-512",  "avx512",   AV_CPU_FLAG_AVX512 },
     218             : #endif
     219             :     { NULL }
     220             : };
     221             : 
     222             : typedef struct CheckasmFuncVersion {
     223             :     struct CheckasmFuncVersion *next;
     224             :     void *func;
     225             :     int ok;
     226             :     int cpu;
     227             :     CheckasmPerf perf;
     228             : } CheckasmFuncVersion;
     229             : 
     230             : /* Binary search tree node */
     231             : typedef struct CheckasmFunc {
     232             :     struct CheckasmFunc *child[2];
     233             :     CheckasmFuncVersion versions;
     234             :     uint8_t color; /* 0 = red, 1 = black */
     235             :     char name[1];
     236             : } CheckasmFunc;
     237             : 
     238             : /* Internal state */
     239             : static struct {
     240             :     CheckasmFunc *funcs;
     241             :     CheckasmFunc *current_func;
     242             :     CheckasmFuncVersion *current_func_ver;
     243             :     const char *current_test_name;
     244             :     const char *bench_pattern;
     245             :     int bench_pattern_len;
     246             :     int num_checked;
     247             :     int num_failed;
     248             : 
     249             :     /* perf */
     250             :     int nop_time;
     251             :     int sysfd;
     252             : 
     253             :     int cpu_flag;
     254             :     const char *cpu_flag_name;
     255             :     const char *test_name;
     256             : } state;
     257             : 
     258             : /* PRNG state */
     259             : AVLFG checkasm_lfg;
     260             : 
     261             : /* float compare support code */
     262    10889664 : static int is_negative(union av_intfloat32 u)
     263             : {
     264    10889664 :     return u.i >> 31;
     265             : }
     266             : 
     267     5444832 : int float_near_ulp(float a, float b, unsigned max_ulp)
     268             : {
     269             :     union av_intfloat32 x, y;
     270             : 
     271     5444832 :     x.f = a;
     272     5444832 :     y.f = b;
     273             : 
     274     5444832 :     if (is_negative(x) != is_negative(y)) {
     275             :         // handle -0.0 == +0.0
     276           0 :         return a == b;
     277             :     }
     278             : 
     279     5444832 :     if (llabs((int64_t)x.i - y.i) <= max_ulp)
     280     5444824 :         return 1;
     281             : 
     282           8 :     return 0;
     283             : }
     284             : 
     285        6868 : int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
     286             :                          unsigned len)
     287             : {
     288             :     unsigned i;
     289             : 
     290     5446580 :     for (i = 0; i < len; i++) {
     291     5439712 :         if (!float_near_ulp(a[i], b[i], max_ulp))
     292           0 :             return 0;
     293             :     }
     294        6868 :     return 1;
     295             : }
     296             : 
     297      153008 : int float_near_abs_eps(float a, float b, float eps)
     298             : {
     299      153008 :     float abs_diff = fabsf(a - b);
     300      153008 :     if (abs_diff < eps)
     301      153008 :         return 1;
     302             : 
     303           0 :     fprintf(stderr, "test failed comparing %g with %g (abs diff=%g with EPS=%g)\n", a, b, abs_diff, eps);
     304             : 
     305           0 :     return 0;
     306             : }
     307             : 
     308         124 : int float_near_abs_eps_array(const float *a, const float *b, float eps,
     309             :                          unsigned len)
     310             : {
     311             :     unsigned i;
     312             : 
     313      147232 :     for (i = 0; i < len; i++) {
     314      147108 :         if (!float_near_abs_eps(a[i], b[i], eps))
     315           0 :             return 0;
     316             :     }
     317         124 :     return 1;
     318             : }
     319             : 
     320        5120 : int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp)
     321             : {
     322        5120 :     return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
     323             : }
     324             : 
     325           0 : int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
     326             :                          unsigned max_ulp, unsigned len)
     327             : {
     328             :     unsigned i;
     329             : 
     330           0 :     for (i = 0; i < len; i++) {
     331           0 :         if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
     332           0 :             return 0;
     333             :     }
     334           0 :     return 1;
     335             : }
     336             : 
     337        1792 : int double_near_abs_eps(double a, double b, double eps)
     338             : {
     339        1792 :     double abs_diff = fabs(a - b);
     340             : 
     341        1792 :     return abs_diff < eps;
     342             : }
     343             : 
     344           0 : int double_near_abs_eps_array(const double *a, const double *b, double eps,
     345             :                               unsigned len)
     346             : {
     347             :     unsigned i;
     348             : 
     349           0 :     for (i = 0; i < len; i++) {
     350           0 :         if (!double_near_abs_eps(a[i], b[i], eps))
     351           0 :             return 0;
     352             :     }
     353           0 :     return 1;
     354             : }
     355             : 
     356             : /* Print colored text to stderr if the terminal supports it */
     357         310 : static void color_printf(int color, const char *fmt, ...)
     358             : {
     359             :     static int use_color = -1;
     360             :     va_list arg;
     361             : 
     362             : #if HAVE_SETCONSOLETEXTATTRIBUTE
     363             :     static HANDLE con;
     364             :     static WORD org_attributes;
     365             : 
     366             :     if (use_color < 0) {
     367             :         CONSOLE_SCREEN_BUFFER_INFO con_info;
     368             :         con = GetStdHandle(STD_ERROR_HANDLE);
     369             :         if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
     370             :             org_attributes = con_info.wAttributes;
     371             :             use_color = 1;
     372             :         } else
     373             :             use_color = 0;
     374             :     }
     375             :     if (use_color)
     376             :         SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
     377             : #else
     378         310 :     if (use_color < 0) {
     379          32 :         const char *term = getenv("TERM");
     380          32 :         use_color = term && strcmp(term, "dumb") && isatty(2);
     381             :     }
     382         310 :     if (use_color)
     383           0 :         fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
     384             : #endif
     385             : 
     386         310 :     va_start(arg, fmt);
     387         310 :     vfprintf(stderr, fmt, arg);
     388         310 :     va_end(arg);
     389             : 
     390         310 :     if (use_color) {
     391             : #if HAVE_SETCONSOLETEXTATTRIBUTE
     392             :         SetConsoleTextAttribute(con, org_attributes);
     393             : #else
     394           0 :         fprintf(stderr, "\x1b[0m");
     395             : #endif
     396             :     }
     397         310 : }
     398             : 
     399             : /* Deallocate a tree */
     400        3122 : static void destroy_func_tree(CheckasmFunc *f)
     401             : {
     402        3122 :     if (f) {
     403        1545 :         CheckasmFuncVersion *v = f->versions.next;
     404        5214 :         while (v) {
     405        2124 :             CheckasmFuncVersion *next = v->next;
     406        2124 :             free(v);
     407        2124 :             v = next;
     408             :         }
     409             : 
     410        1545 :         destroy_func_tree(f->child[0]);
     411        1545 :         destroy_func_tree(f->child[1]);
     412        1545 :         free(f);
     413             :     }
     414        3122 : }
     415             : 
     416             : /* Allocate a zero-initialized block, clean up and exit on failure */
     417        3669 : static void *checkasm_malloc(size_t size)
     418             : {
     419        3669 :     void *ptr = calloc(1, size);
     420        3669 :     if (!ptr) {
     421           0 :         fprintf(stderr, "checkasm: malloc failed\n");
     422           0 :         destroy_func_tree(state.funcs);
     423           0 :         exit(1);
     424             :     }
     425        3669 :     return ptr;
     426             : }
     427             : 
     428             : /* Get the suffix of the specified cpu flag */
     429           0 : static const char *cpu_suffix(int cpu)
     430             : {
     431           0 :     int i = FF_ARRAY_ELEMS(cpus);
     432             : 
     433           0 :     while (--i >= 0)
     434           0 :         if (cpu & cpus[i].flag)
     435           0 :             return cpus[i].suffix;
     436             : 
     437           0 :     return "c";
     438             : }
     439             : 
     440           0 : static int cmp_nop(const void *a, const void *b)
     441             : {
     442           0 :     return *(const uint16_t*)a - *(const uint16_t*)b;
     443             : }
     444             : 
     445             : /* Measure the overhead of the timing code (in decicycles) */
     446           0 : static int measure_nop_time(void)
     447             : {
     448             :     uint16_t nops[10000];
     449           0 :     int i, nop_sum = 0;
     450           0 :     av_unused const int sysfd = state.sysfd;
     451             : 
     452           0 :     uint64_t t = 0;
     453           0 :     for (i = 0; i < 10000; i++) {
     454           0 :         PERF_START(t);
     455           0 :         PERF_STOP(t);
     456           0 :         nops[i] = t;
     457             :     }
     458             : 
     459           0 :     qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
     460           0 :     for (i = 2500; i < 7500; i++)
     461           0 :         nop_sum += nops[i];
     462             : 
     463           0 :     return nop_sum / 500;
     464             : }
     465             : 
     466             : /* Print benchmark results */
     467           0 : static void print_benchs(CheckasmFunc *f)
     468             : {
     469           0 :     if (f) {
     470           0 :         print_benchs(f->child[0]);
     471             : 
     472             :         /* Only print functions with at least one assembly version */
     473           0 :         if (f->versions.cpu || f->versions.next) {
     474           0 :             CheckasmFuncVersion *v = &f->versions;
     475             :             do {
     476           0 :                 CheckasmPerf *p = &v->perf;
     477           0 :                 if (p->iterations) {
     478           0 :                     int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
     479           0 :                     printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
     480             :                 }
     481           0 :             } while ((v = v->next));
     482             :         }
     483             : 
     484           0 :         print_benchs(f->child[1]);
     485             :     }
     486           0 : }
     487             : 
     488             : /* ASCIIbetical sort except preserving natural order for numbers */
     489      164687 : static int cmp_func_names(const char *a, const char *b)
     490             : {
     491      164687 :     const char *start = a;
     492             :     int ascii_diff, digit_diff;
     493             : 
     494      164687 :     for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++);
     495      164687 :     for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
     496             : 
     497      164687 :     if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b)))
     498       29189 :         return digit_diff;
     499             : 
     500      135498 :     return ascii_diff;
     501             : }
     502             : 
     503             : /* Perform a tree rotation in the specified direction and return the new root */
     504        1477 : static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir)
     505             : {
     506        1477 :     CheckasmFunc *r = f->child[dir^1];
     507        1477 :     f->child[dir^1] = r->child[dir];
     508        1477 :     r->child[dir] = f;
     509        1477 :     r->color = f->color;
     510        1477 :     f->color = 0;
     511        1477 :     return r;
     512             : }
     513             : 
     514             : #define is_red(f) ((f) && !(f)->color)
     515             : 
     516             : /* Balance a left-leaning red-black tree at the specified node */
     517       10715 : static void balance_tree(CheckasmFunc **root)
     518             : {
     519       10715 :     CheckasmFunc *f = *root;
     520             : 
     521       10715 :     if (is_red(f->child[0]) && is_red(f->child[1])) {
     522         916 :         f->color ^= 1;
     523         916 :         f->child[0]->color = f->child[1]->color = 1;
     524             :     }
     525             : 
     526       10715 :     if (!is_red(f->child[0]) && is_red(f->child[1]))
     527        1054 :         *root = rotate_tree(f, 0); /* Rotate left */
     528        9661 :     else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
     529         423 :         *root = rotate_tree(f, 1); /* Rotate right */
     530       10715 : }
     531             : 
     532             : /* Get a node with the specified name, creating it if it doesn't exist */
     533      166232 : static CheckasmFunc *get_func(CheckasmFunc **root, const char *name)
     534             : {
     535      166232 :     CheckasmFunc *f = *root;
     536             : 
     537      166232 :     if (f) {
     538             :         /* Search the tree for a matching node */
     539      164687 :         int cmp = cmp_func_names(name, f->name);
     540      164687 :         if (cmp) {
     541      143807 :             f = get_func(&f->child[cmp > 0], name);
     542             : 
     543             :             /* Rebalance the tree on the way up if a new node was inserted */
     544      143807 :             if (!f->versions.func)
     545       10715 :                 balance_tree(root);
     546             :         }
     547             :     } else {
     548             :         /* Allocate and insert a new node into the tree */
     549        1545 :         int name_length = strlen(name);
     550        1545 :         f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length);
     551        1545 :         memcpy(f->name, name, name_length + 1);
     552             :     }
     553             : 
     554      166232 :     return f;
     555             : }
     556             : 
     557             : /* Perform tests and benchmarks for the specified cpu flag if supported by the host */
     558         576 : static void check_cpu_flag(const char *name, int flag)
     559             : {
     560         576 :     int old_cpu_flag = state.cpu_flag;
     561             : 
     562         576 :     flag |= old_cpu_flag;
     563         576 :     av_force_cpu_flags(-1);
     564         576 :     state.cpu_flag = flag & av_get_cpu_flags();
     565         576 :     av_force_cpu_flags(state.cpu_flag);
     566             : 
     567         576 :     if (!flag || state.cpu_flag != old_cpu_flag) {
     568             :         int i;
     569             : 
     570         416 :         state.cpu_flag_name = name;
     571       14976 :         for (i = 0; tests[i].func; i++) {
     572       14560 :             if (state.test_name && strcmp(tests[i].name, state.test_name))
     573       14144 :                 continue;
     574         416 :             state.current_test_name = tests[i].name;
     575         416 :             tests[i].func();
     576             :         }
     577             :     }
     578         576 : }
     579             : 
     580             : /* Print the name of the current CPU flag, but only do it once */
     581         201 : static void print_cpu_name(void)
     582             : {
     583         201 :     if (state.cpu_flag_name) {
     584         109 :         color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
     585         109 :         state.cpu_flag_name = NULL;
     586             :     }
     587         201 : }
     588             : 
     589             : #if CONFIG_LINUX_PERF
     590             : static int bench_init_linux(void)
     591             : {
     592             :     struct perf_event_attr attr = {
     593             :         .type           = PERF_TYPE_HARDWARE,
     594             :         .size           = sizeof(struct perf_event_attr),
     595             :         .config         = PERF_COUNT_HW_CPU_CYCLES,
     596             :         .disabled       = 1, // start counting only on demand
     597             :         .exclude_kernel = 1,
     598             :         .exclude_hv     = 1,
     599             :     };
     600             : 
     601             :     printf("benchmarking with Linux Perf Monitoring API\n");
     602             : 
     603             :     state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
     604             :     if (state.sysfd == -1) {
     605             :         perror("syscall");
     606             :         return -1;
     607             :     }
     608             :     return 0;
     609             : }
     610             : #endif
     611             : 
     612           0 : static int bench_init_ffmpeg(void)
     613             : {
     614             : #ifdef AV_READ_TIME
     615           0 :     printf("benchmarking with native FFmpeg timers\n");
     616           0 :     return 0;
     617             : #else
     618             :     fprintf(stderr, "checkasm: --bench is not supported on your system\n");
     619             :     return -1;
     620             : #endif
     621             : }
     622             : 
     623           0 : static int bench_init(void)
     624             : {
     625             : #if CONFIG_LINUX_PERF
     626             :     int ret = bench_init_linux();
     627             : #else
     628           0 :     int ret = bench_init_ffmpeg();
     629             : #endif
     630           0 :     if (ret < 0)
     631           0 :         return ret;
     632             : 
     633           0 :     state.nop_time = measure_nop_time();
     634           0 :     printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
     635           0 :     return 0;
     636             : }
     637             : 
     638          32 : static void bench_uninit(void)
     639             : {
     640             : #if CONFIG_LINUX_PERF
     641             :     if (state.sysfd > 0)
     642             :         close(state.sysfd);
     643             : #endif
     644          32 : }
     645             : 
     646          32 : int main(int argc, char *argv[])
     647             : {
     648          32 :     unsigned int seed = av_get_random_seed();
     649          32 :     int i, ret = 0;
     650             : 
     651             : #if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
     652             :     if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
     653             :         checkasm_checked_call = checkasm_checked_call_vfp;
     654             : #endif
     655             : 
     656          32 :     if (!tests[0].func || !cpus[0].flag) {
     657           0 :         fprintf(stderr, "checkasm: no tests to perform\n");
     658           0 :         return 0;
     659             :     }
     660             : 
     661          96 :     while (argc > 1) {
     662          32 :         if (!strncmp(argv[1], "--bench", 7)) {
     663           0 :             if (bench_init() < 0)
     664           0 :                 return 1;
     665           0 :             if (argv[1][7] == '=') {
     666           0 :                 state.bench_pattern = argv[1] + 8;
     667           0 :                 state.bench_pattern_len = strlen(state.bench_pattern);
     668             :             } else
     669           0 :                 state.bench_pattern = "";
     670          32 :         } else if (!strncmp(argv[1], "--test=", 7)) {
     671          32 :             state.test_name = argv[1] + 7;
     672             :         } else {
     673           0 :             seed = strtoul(argv[1], NULL, 10);
     674             :         }
     675             : 
     676          32 :         argc--;
     677          32 :         argv++;
     678             :     }
     679             : 
     680          32 :     fprintf(stderr, "checkasm: using random seed %u\n", seed);
     681          32 :     av_lfg_init(&checkasm_lfg, seed);
     682             : 
     683          32 :     check_cpu_flag(NULL, 0);
     684         576 :     for (i = 0; cpus[i].flag; i++)
     685         544 :         check_cpu_flag(cpus[i].name, cpus[i].flag);
     686             : 
     687          32 :     if (state.num_failed) {
     688           0 :         fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked);
     689           0 :         ret = 1;
     690             :     } else {
     691          32 :         fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
     692          32 :         if (state.bench_pattern) {
     693           0 :             print_benchs(state.funcs);
     694             :         }
     695             :     }
     696             : 
     697          32 :     destroy_func_tree(state.funcs);
     698          32 :     bench_uninit();
     699          32 :     return ret;
     700             : }
     701             : 
     702             : /* Decide whether or not the specified function needs to be tested and
     703             :  * allocate/initialize data structures if needed. Returns a pointer to a
     704             :  * reference function if the function should be tested, otherwise NULL */
     705       22425 : void *checkasm_check_func(void *func, const char *name, ...)
     706             : {
     707             :     char name_buf[256];
     708       22425 :     void *ref = func;
     709             :     CheckasmFuncVersion *v;
     710             :     int name_length;
     711             :     va_list arg;
     712             : 
     713       22425 :     va_start(arg, name);
     714       22425 :     name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
     715       22425 :     va_end(arg);
     716             : 
     717       22425 :     if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
     718           0 :         return NULL;
     719             : 
     720       22425 :     state.current_func = get_func(&state.funcs, name_buf);
     721       22425 :     state.funcs->color = 1;
     722       22425 :     v = &state.current_func->versions;
     723             : 
     724       22425 :     if (v->func) {
     725             :         CheckasmFuncVersion *prev;
     726             :         do {
     727             :             /* Only test functions that haven't already been tested */
     728       36418 :             if (v->func == func)
     729       18756 :                 return NULL;
     730             : 
     731       17662 :             if (v->ok)
     732       17662 :                 ref = v->func;
     733             : 
     734       17662 :             prev = v;
     735       17662 :         } while ((v = v->next));
     736             : 
     737        2124 :         v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
     738             :     }
     739             : 
     740        3669 :     v->func = func;
     741        3669 :     v->ok = 1;
     742        3669 :     v->cpu = state.cpu_flag;
     743        3669 :     state.current_func_ver = v;
     744             : 
     745        3669 :     if (state.cpu_flag)
     746        2124 :         state.num_checked++;
     747             : 
     748        3669 :     return ref;
     749             : }
     750             : 
     751             : /* Decide whether or not the current function needs to be benchmarked */
     752       11144 : int checkasm_bench_func(void)
     753             : {
     754       11144 :     return !state.num_failed && state.bench_pattern &&
     755           0 :            !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len);
     756             : }
     757             : 
     758             : /* Indicate that the current test has failed */
     759           0 : void checkasm_fail_func(const char *msg, ...)
     760             : {
     761           0 :     if (state.current_func_ver->cpu && state.current_func_ver->ok) {
     762             :         va_list arg;
     763             : 
     764           0 :         print_cpu_name();
     765           0 :         fprintf(stderr, "   %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu));
     766           0 :         va_start(arg, msg);
     767           0 :         vfprintf(stderr, msg, arg);
     768           0 :         va_end(arg);
     769           0 :         fprintf(stderr, ")\n");
     770             : 
     771           0 :         state.current_func_ver->ok = 0;
     772           0 :         state.num_failed++;
     773             :     }
     774           0 : }
     775             : 
     776             : /* Get the benchmark context of the current function */
     777           0 : CheckasmPerf *checkasm_get_perf_context(void)
     778             : {
     779           0 :     CheckasmPerf *perf = &state.current_func_ver->perf;
     780           0 :     memset(perf, 0, sizeof(*perf));
     781           0 :     perf->sysfd = state.sysfd;
     782           0 :     return perf;
     783             : }
     784             : 
     785             : /* Print the outcome of all tests performed since the last time this function was called */
     786        1105 : void checkasm_report(const char *name, ...)
     787             : {
     788             :     static int prev_checked, prev_failed, max_length;
     789             : 
     790        1105 :     if (state.num_checked > prev_checked) {
     791         201 :         int pad_length = max_length + 4;
     792             :         va_list arg;
     793             : 
     794         201 :         print_cpu_name();
     795         201 :         pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
     796         201 :         va_start(arg, name);
     797         201 :         pad_length -= vfprintf(stderr, name, arg);
     798         201 :         va_end(arg);
     799         201 :         fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
     800             : 
     801         201 :         if (state.num_failed == prev_failed)
     802         201 :             color_printf(COLOR_GREEN, "OK");
     803             :         else
     804           0 :             color_printf(COLOR_RED, "FAILED");
     805         201 :         fprintf(stderr, "]\n");
     806             : 
     807         201 :         prev_checked = state.num_checked;
     808         201 :         prev_failed  = state.num_failed;
     809         904 :     } else if (!state.cpu_flag) {
     810             :         /* Calculate the amount of padding required to make the output vertically aligned */
     811          85 :         int length = strlen(state.current_test_name);
     812             :         va_list arg;
     813             : 
     814          85 :         va_start(arg, name);
     815          85 :         length += vsnprintf(NULL, 0, name, arg);
     816          85 :         va_end(arg);
     817             : 
     818          85 :         if (length > max_length)
     819          53 :             max_length = length;
     820             :     }
     821        1105 : }

Generated by: LCOV version 1.13