LCOV - code coverage report
Current view: top level - libavcodec - snow_dwt.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 435 455 95.6 %
Date: 2017-12-17 16:07:53 Functions: 45 49 91.8 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
       3             :  * Copyright (C) 2008 David Conrad
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : #include "libavutil/attributes.h"
      23             : #include "libavutil/avassert.h"
      24             : #include "libavutil/common.h"
      25             : #include "me_cmp.h"
      26             : #include "snow_dwt.h"
      27             : 
      28         461 : int ff_slice_buffer_init(slice_buffer *buf, int line_count,
      29             :                          int max_allocated_lines, int line_width,
      30             :                          IDWTELEM *base_buffer)
      31             : {
      32             :     int i;
      33             : 
      34         461 :     buf->base_buffer = base_buffer;
      35         461 :     buf->line_count  = line_count;
      36         461 :     buf->line_width  = line_width;
      37         461 :     buf->data_count  = max_allocated_lines;
      38         461 :     buf->line        = av_mallocz_array(line_count, sizeof(IDWTELEM *));
      39         461 :     if (!buf->line)
      40           0 :         return AVERROR(ENOMEM);
      41         461 :     buf->data_stack  = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *));
      42         461 :     if (!buf->data_stack) {
      43           0 :         av_freep(&buf->line);
      44           0 :         return AVERROR(ENOMEM);
      45             :     }
      46             : 
      47       32421 :     for (i = 0; i < max_allocated_lines; i++) {
      48       31960 :         buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM));
      49       31960 :         if (!buf->data_stack[i]) {
      50           0 :             for (i--; i >=0; i--)
      51           0 :                 av_freep(&buf->data_stack[i]);
      52           0 :             av_freep(&buf->data_stack);
      53           0 :             av_freep(&buf->line);
      54           0 :             return AVERROR(ENOMEM);
      55             :         }
      56             :     }
      57             : 
      58         461 :     buf->data_stack_top = max_allocated_lines - 1;
      59         461 :     return 0;
      60             : }
      61             : 
      62      128848 : IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line)
      63             : {
      64             :     IDWTELEM *buffer;
      65             : 
      66      128848 :     av_assert0(buf->data_stack_top >= 0);
      67             : //  av_assert1(!buf->line[line]);
      68      128848 :     if (buf->line[line])
      69           0 :         return buf->line[line];
      70             : 
      71      128848 :     buffer = buf->data_stack[buf->data_stack_top];
      72      128848 :     buf->data_stack_top--;
      73      128848 :     buf->line[line] = buffer;
      74             : 
      75      128848 :     return buffer;
      76             : }
      77             : 
      78      128848 : void ff_slice_buffer_release(slice_buffer *buf, int line)
      79             : {
      80             :     IDWTELEM *buffer;
      81             : 
      82             :     av_assert1(line >= 0 && line < buf->line_count);
      83             :     av_assert1(buf->line[line]);
      84             : 
      85      128848 :     buffer = buf->line[line];
      86      128848 :     buf->data_stack_top++;
      87      128848 :     buf->data_stack[buf->data_stack_top] = buffer;
      88      128848 :     buf->line[line]                      = NULL;
      89      128848 : }
      90             : 
      91        1864 : void ff_slice_buffer_flush(slice_buffer *buf)
      92             : {
      93             :     int i;
      94             : 
      95        1864 :     if (!buf->line)
      96          20 :         return;
      97             : 
      98      257844 :     for (i = 0; i < buf->line_count; i++)
      99      256000 :         if (buf->line[i])
     100         848 :             ff_slice_buffer_release(buf, i);
     101             : }
     102             : 
     103         481 : void ff_slice_buffer_destroy(slice_buffer *buf)
     104             : {
     105             :     int i;
     106         481 :     ff_slice_buffer_flush(buf);
     107             : 
     108         481 :     if (buf->data_stack)
     109       32421 :         for (i = buf->data_count - 1; i >= 0; i--)
     110       31960 :             av_freep(&buf->data_stack[i]);
     111         481 :     av_freep(&buf->data_stack);
     112         481 :     av_freep(&buf->line);
     113         481 : }
     114             : 
     115    72825120 : static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
     116             :                                   int dst_step, int src_step, int ref_step,
     117             :                                   int width, int mul, int add, int shift,
     118             :                                   int highpass, int inverse)
     119             : {
     120    72825120 :     const int mirror_left  = !highpass;
     121    72825120 :     const int mirror_right = (width & 1) ^ highpass;
     122    72825120 :     const int w            = (width >> 1) - 1 + (highpass & width);
     123             :     int i;
     124             : 
     125             : #define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref)))
     126    72825120 :     if (mirror_left) {
     127    24330840 :         dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse);
     128    24330840 :         dst   += dst_step;
     129    24330840 :         src   += src_step;
     130             :     }
     131             : 
     132   769916940 :     for (i = 0; i < w; i++)
     133   697091820 :         dst[i * dst_step] = LIFT(src[i * src_step],
     134             :                                  ((mul * (ref[i * ref_step] +
     135             :                                           ref[(i + 1) * ref_step]) +
     136             :                                    add) >> shift),
     137             :                                  inverse);
     138             : 
     139    72825120 :     if (mirror_right)
     140    48494280 :         dst[w * dst_step] = LIFT(src[w * src_step],
     141             :                                  ((mul * 2 * ref[w * ref_step] + add) >> shift),
     142             :                                  inverse);
     143    72825120 : }
     144             : 
     145    24163440 : static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
     146             :                                    int dst_step, int src_step, int ref_step,
     147             :                                    int width, int mul, int add, int shift,
     148             :                                    int highpass, int inverse)
     149             : {
     150    24163440 :     const int mirror_left  = !highpass;
     151    24163440 :     const int mirror_right = (width & 1) ^ highpass;
     152    24163440 :     const int w            = (width >> 1) - 1 + (highpass & width);
     153             :     int i;
     154             : 
     155             :     av_assert1(shift == 4);
     156             : #define LIFTS(src, ref, inv)                                            \
     157             :     ((inv) ? (src) + (((ref) + 4 * (src)) >> shift)                     \
     158             :            : -((-16 * (src) + (ref) + add /                             \
     159             :                 4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23)))
     160    24163440 :     if (mirror_left) {
     161    24163440 :         dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse);
     162    24163440 :         dst   += dst_step;
     163    24163440 :         src   += src_step;
     164             :     }
     165             : 
     166   246511280 :     for (i = 0; i < w; i++)
     167   222347840 :         dst[i * dst_step] = LIFTS(src[i * src_step],
     168             :                                   mul * (ref[i * ref_step] +
     169             :                                          ref[(i + 1) * ref_step]) + add,
     170             :                                   inverse);
     171             : 
     172    24163440 :     if (mirror_right)
     173           0 :         dst[w * dst_step] = LIFTS(src[w * src_step],
     174             :                                   mul * 2 * ref[w * ref_step] + add,
     175             :                                   inverse);
     176    24163440 : }
     177             : 
     178      167400 : static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width)
     179             : {
     180      167400 :     const int width2 = width >> 1;
     181             :     int x;
     182      167400 :     const int w2 = (width + 1) >> 1;
     183             : 
     184    15357600 :     for (x = 0; x < width2; x++) {
     185    15190200 :         temp[x]      = b[2 * x];
     186    15190200 :         temp[x + w2] = b[2 * x + 1];
     187             :     }
     188      167400 :     if (width & 1)
     189        2700 :         temp[x] = b[2 * x];
     190      167400 :     lift(b + w2, temp + w2, temp,   1, 1, 1, width, -1, 0, 1, 1, 0);
     191      167400 :     lift(b,      temp,      b + w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
     192      167400 : }
     193             : 
     194       83550 : static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
     195             :                                     int width)
     196             : {
     197             :     int i;
     198             : 
     199    15273450 :     for (i = 0; i < width; i++)
     200    15189900 :         b1[i] -= (b0[i] + b2[i]) >> 1;
     201       83550 : }
     202             : 
     203       83850 : static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
     204             :                                     int width)
     205             : {
     206             :     int i;
     207             : 
     208    15277050 :     for (i = 0; i < width; i++)
     209    15193200 :         b1[i] += (b0[i] + b2[i] + 2) >> 2;
     210       83850 : }
     211             : 
     212        2250 : static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp,
     213             :                                  int width, int height, int stride)
     214             : {
     215             :     int y;
     216        2250 :     DWTELEM *b0 = buffer + avpriv_mirror(-2 - 1, height - 1) * stride;
     217        2250 :     DWTELEM *b1 = buffer + avpriv_mirror(-2,     height - 1) * stride;
     218             : 
     219       88350 :     for (y = -2; y < height; y += 2) {
     220       86100 :         DWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride;
     221       86100 :         DWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride;
     222             : 
     223       86100 :         if (y + 1 < (unsigned)height)
     224       83550 :             horizontal_decompose53i(b2, temp, width);
     225       86100 :         if (y + 2 < (unsigned)height)
     226       83850 :             horizontal_decompose53i(b3, temp, width);
     227             : 
     228       86100 :         if (y + 1 < (unsigned)height)
     229       83550 :             vertical_decompose53iH0(b1, b2, b3, width);
     230       86100 :         if (y + 0 < (unsigned)height)
     231       83850 :             vertical_decompose53iL0(b0, b1, b2, width);
     232             : 
     233       86100 :         b0 = b2;
     234       86100 :         b1 = b3;
     235             :     }
     236        2250 : }
     237             : 
     238    24163440 : static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width)
     239             : {
     240    24163440 :     const int w2 = (width + 1) >> 1;
     241             : 
     242    24163440 :     lift(temp + w2, b + 1, b,         1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
     243    24163440 :     liftS(temp,     b,     temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
     244    24163440 :     lift(b + w2, temp + w2, temp,     1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
     245    24163440 :     lift(b,      temp,      b + w2,   1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
     246    24163440 : }
     247             : 
     248    12081720 : static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
     249             :                                     int width)
     250             : {
     251             :     int i;
     252             : 
     253   258593000 :     for (i = 0; i < width; i++)
     254   246511280 :         b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
     255    12081720 : }
     256             : 
     257    12081720 : static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
     258             :                                     int width)
     259             : {
     260             :     int i;
     261             : 
     262   258593000 :     for (i = 0; i < width; i++)
     263   246511280 :         b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
     264    12081720 : }
     265             : 
     266    12081720 : static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
     267             :                                     int width)
     268             : {
     269             :     int i;
     270             : 
     271   258593000 :     for (i = 0; i < width; i++)
     272   493022560 :         b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) /
     273   246511280 :                 (5 * 16) - (1 << 23);
     274    12081720 : }
     275             : 
     276    12081720 : static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
     277             :                                     int width)
     278             : {
     279             :     int i;
     280             : 
     281   258593000 :     for (i = 0; i < width; i++)
     282   246511280 :         b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
     283    12081720 : }
     284             : 
     285     1951060 : static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp,
     286             :                                  int width, int height, int stride)
     287             : {
     288             :     int y;
     289     1951060 :     DWTELEM *b0 = buffer + avpriv_mirror(-4 - 1, height - 1) * stride;
     290     1951060 :     DWTELEM *b1 = buffer + avpriv_mirror(-4,     height - 1) * stride;
     291     1951060 :     DWTELEM *b2 = buffer + avpriv_mirror(-4 + 1, height - 1) * stride;
     292     1951060 :     DWTELEM *b3 = buffer + avpriv_mirror(-4 + 2, height - 1) * stride;
     293             : 
     294    17934900 :     for (y = -4; y < height; y += 2) {
     295    15983840 :         DWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride;
     296    15983840 :         DWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride;
     297             : 
     298    15983840 :         if (y + 3 < (unsigned)height)
     299    12081720 :             horizontal_decompose97i(b4, temp, width);
     300    15983840 :         if (y + 4 < (unsigned)height)
     301    12081720 :             horizontal_decompose97i(b5, temp, width);
     302             : 
     303    15983840 :         if (y + 3 < (unsigned)height)
     304    12081720 :             vertical_decompose97iH0(b3, b4, b5, width);
     305    15983840 :         if (y + 2 < (unsigned)height)
     306    12081720 :             vertical_decompose97iL0(b2, b3, b4, width);
     307    15983840 :         if (y + 1 < (unsigned)height)
     308    12081720 :             vertical_decompose97iH1(b1, b2, b3, width);
     309    15983840 :         if (y + 0 < (unsigned)height)
     310    12081720 :             vertical_decompose97iL1(b0, b1, b2, width);
     311             : 
     312    15983840 :         b0 = b2;
     313    15983840 :         b1 = b3;
     314    15983840 :         b2 = b4;
     315    15983840 :         b3 = b5;
     316             :     }
     317     1951060 : }
     318             : 
     319      487990 : void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height,
     320             :                     int stride, int type, int decomposition_count)
     321             : {
     322             :     int level;
     323             : 
     324     2441300 :     for (level = 0; level < decomposition_count; level++) {
     325     1953310 :         switch (type) {
     326     1951060 :         case DWT_97:
     327     1951060 :             spatial_decompose97i(buffer, temp,
     328             :                                  width >> level, height >> level,
     329             :                                  stride << level);
     330     1951060 :             break;
     331        2250 :         case DWT_53:
     332        2250 :             spatial_decompose53i(buffer, temp,
     333             :                                  width >> level, height >> level,
     334             :                                  stride << level);
     335        2250 :             break;
     336             :         }
     337             :     }
     338      487990 : }
     339             : 
     340      392832 : static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width)
     341             : {
     342      392832 :     const int width2 = width >> 1;
     343      392832 :     const int w2     = (width + 1) >> 1;
     344             :     int x;
     345             : 
     346    36039168 :     for (x = 0; x < width2; x++) {
     347    35646336 :         temp[2 * x]     = b[x];
     348    35646336 :         temp[2 * x + 1] = b[x + w2];
     349             :     }
     350      392832 :     if (width & 1)
     351        6336 :         temp[2 * x] = b[x];
     352             : 
     353      392832 :     b[0] = temp[0] - ((temp[1] + 1) >> 1);
     354    35646336 :     for (x = 2; x < width - 1; x += 2) {
     355    35253504 :         b[x]     = temp[x]     - ((temp[x - 1] + temp[x + 1] + 2) >> 2);
     356    35253504 :         b[x - 1] = temp[x - 1] + ((b[x - 2]    + b[x]        + 1) >> 1);
     357             :     }
     358      392832 :     if (width & 1) {
     359        6336 :         b[x]     = temp[x]     - ((temp[x - 1]     + 1) >> 1);
     360        6336 :         b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
     361             :     } else
     362      386496 :         b[x - 1] = temp[x - 1] + b[x - 2];
     363      392832 : }
     364             : 
     365      112288 : static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
     366             :                                   int width)
     367             : {
     368             :     int i;
     369             : 
     370    20369624 :     for (i = 0; i < width; i++)
     371    20257336 :         b1[i] += (b0[i] + b2[i]) >> 1;
     372      112288 : }
     373             : 
     374      112992 : static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
     375             :                                   int width)
     376             : {
     377             :     int i;
     378             : 
     379    20378072 :     for (i = 0; i < width; i++)
     380    20265080 :         b1[i] -= (b0[i] + b2[i] + 2) >> 2;
     381      112992 : }
     382             : 
     383        2310 : static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb,
     384             :                                              int height, int stride_line)
     385             : {
     386        2310 :     cs->b0 = slice_buffer_get_line(sb,
     387             :                                    avpriv_mirror(-1 - 1, height - 1) * stride_line);
     388        2310 :     cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-1, height - 1) * stride_line);
     389        2310 :     cs->y  = -1;
     390        2310 : }
     391             : 
     392        2970 : static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer,
     393             :                                     int height, int stride)
     394             : {
     395        2970 :     cs->b0 = buffer + avpriv_mirror(-1 - 1, height - 1) * stride;
     396        2970 :     cs->b1 = buffer + avpriv_mirror(-1,     height - 1) * stride;
     397        2970 :     cs->y  = -1;
     398        2970 : }
     399             : 
     400       88396 : static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
     401             :                                            IDWTELEM *temp,
     402             :                                            int width, int height,
     403             :                                            int stride_line)
     404             : {
     405       88396 :     int y = cs->y;
     406             : 
     407       88396 :     IDWTELEM *b0 = cs->b0;
     408       88396 :     IDWTELEM *b1 = cs->b1;
     409       88396 :     IDWTELEM *b2 = slice_buffer_get_line(sb,
     410             :                                          avpriv_mirror(y + 1, height - 1) *
     411             :                                          stride_line);
     412       88396 :     IDWTELEM *b3 = slice_buffer_get_line(sb,
     413             :                                          avpriv_mirror(y + 2, height - 1) *
     414             :                                          stride_line);
     415             : 
     416      172172 :     if (y + 1 < (unsigned)height && y < (unsigned)height) {
     417             :         int x;
     418             : 
     419    15472072 :         for (x = 0; x < width; x++) {
     420    15388296 :             b2[x] -= (b1[x] + b3[x] + 2) >> 2;
     421    15388296 :             b1[x] += (b0[x] + b2[x])     >> 1;
     422             :         }
     423             :     } else {
     424        4620 :         if (y + 1 < (unsigned)height)
     425        2310 :             vertical_compose53iL0(b1, b2, b3, width);
     426        4620 :         if (y + 0 < (unsigned)height)
     427        2002 :             vertical_compose53iH0(b0, b1, b2, width);
     428             :     }
     429             : 
     430       88396 :     if (y - 1 < (unsigned)height)
     431       86086 :         horizontal_compose53i(b0, temp, width);
     432       88396 :     if (y + 0 < (unsigned)height)
     433       85778 :         horizontal_compose53i(b1, temp, width);
     434             : 
     435       88396 :     cs->b0  = b2;
     436       88396 :     cs->b1  = b3;
     437       88396 :     cs->y  += 2;
     438       88396 : }
     439             : 
     440      113652 : static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer,
     441             :                                   IDWTELEM *temp, int width, int height,
     442             :                                   int stride)
     443             : {
     444      113652 :     int y        = cs->y;
     445      113652 :     IDWTELEM *b0 = cs->b0;
     446      113652 :     IDWTELEM *b1 = cs->b1;
     447      113652 :     IDWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride;
     448      113652 :     IDWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride;
     449             : 
     450      113652 :     if (y + 1 < (unsigned)height)
     451      110682 :         vertical_compose53iL0(b1, b2, b3, width);
     452      113652 :     if (y + 0 < (unsigned)height)
     453      110286 :         vertical_compose53iH0(b0, b1, b2, width);
     454             : 
     455      113652 :     if (y - 1 < (unsigned)height)
     456      110682 :         horizontal_compose53i(b0, temp, width);
     457      113652 :     if (y + 0 < (unsigned)height)
     458      110286 :         horizontal_compose53i(b1, temp, width);
     459             : 
     460      113652 :     cs->b0  = b2;
     461      113652 :     cs->b1  = b3;
     462      113652 :     cs->y  += 2;
     463      113652 : }
     464             : 
     465      245520 : void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width)
     466             : {
     467      245520 :     const int w2 = (width + 1) >> 1;
     468             :     int x;
     469             : 
     470      245520 :     temp[0] = b[0] - ((3 * b[w2] + 2) >> 2);
     471    19001232 :     for (x = 1; x < (width >> 1); x++) {
     472    18755712 :         temp[2 * x]     = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3);
     473    18755712 :         temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
     474             :     }
     475      245520 :     if (width & 1) {
     476        3456 :         temp[2 * x]     = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2);
     477        3456 :         temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
     478             :     } else
     479      242064 :         temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2];
     480             : 
     481      245520 :     b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3);
     482    19001232 :     for (x = 2; x < width - 1; x += 2) {
     483    18755712 :         b[x]     = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4);
     484    18755712 :         b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
     485             :     }
     486      245520 :     if (width & 1) {
     487        3456 :         b[x]     = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3);
     488        3456 :         b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
     489             :     } else
     490      242064 :         b[x - 1] = temp[x - 1] + 3 * b[x - 2];
     491      245520 : }
     492             : 
     493       93412 : static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
     494             :                                   int width)
     495             : {
     496             :     int i;
     497             : 
     498    16894156 :     for (i = 0; i < width; i++)
     499    16800744 :         b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
     500       93412 : }
     501             : 
     502       93412 : static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
     503             :                                   int width)
     504             : {
     505             :     int i;
     506             : 
     507    16894156 :     for (i = 0; i < width; i++)
     508    16800744 :         b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
     509       93412 : }
     510             : 
     511       93412 : static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
     512             :                                   int width)
     513             : {
     514             :     int i;
     515             : 
     516    16894156 :     for (i = 0; i < width; i++)
     517    16800744 :         b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS;
     518       93412 : }
     519             : 
     520       93412 : static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
     521             :                                   int width)
     522             : {
     523             :     int i;
     524             : 
     525    16894156 :     for (i = 0; i < width; i++)
     526    16800744 :         b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
     527       93412 : }
     528             : 
     529       29376 : void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
     530             :                                  IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
     531             :                                  int width)
     532             : {
     533             :     int i;
     534             : 
     535     2232576 :     for (i = 0; i < width; i++) {
     536     2203200 :         b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS;
     537     2203200 :         b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS;
     538     2203200 :         b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS;
     539     2203200 :         b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
     540             :     }
     541       29376 : }
     542             : 
     543        4605 : static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb,
     544             :                                              int height, int stride_line)
     545             : {
     546        4605 :     cs->b0 = slice_buffer_get_line(sb, avpriv_mirror(-3 - 1, height - 1) * stride_line);
     547        4605 :     cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-3,     height - 1) * stride_line);
     548        4605 :     cs->b2 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 1, height - 1) * stride_line);
     549        4605 :     cs->b3 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 2, height - 1) * stride_line);
     550        4605 :     cs->y  = -3;
     551        4605 : }
     552             : 
     553        6420 : static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height,
     554             :                                     int stride)
     555             : {
     556        6420 :     cs->b0 = buffer + avpriv_mirror(-3 - 1, height - 1) * stride;
     557        6420 :     cs->b1 = buffer + avpriv_mirror(-3,     height - 1) * stride;
     558        6420 :     cs->b2 = buffer + avpriv_mirror(-3 + 1, height - 1) * stride;
     559        6420 :     cs->b3 = buffer + avpriv_mirror(-3 + 2, height - 1) * stride;
     560        6420 :     cs->y  = -3;
     561        6420 : }
     562             : 
     563       47278 : static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs,
     564             :                                            slice_buffer * sb, IDWTELEM *temp,
     565             :                                            int width, int height,
     566             :                                            int stride_line)
     567             : {
     568       47278 :     int y = cs->y;
     569             : 
     570       47278 :     IDWTELEM *b0 = cs->b0;
     571       47278 :     IDWTELEM *b1 = cs->b1;
     572       47278 :     IDWTELEM *b2 = cs->b2;
     573       47278 :     IDWTELEM *b3 = cs->b3;
     574       47278 :     IDWTELEM *b4 = slice_buffer_get_line(sb,
     575             :                                          avpriv_mirror(y + 3, height - 1) *
     576             :                                          stride_line);
     577       47278 :     IDWTELEM *b5 = slice_buffer_get_line(sb,
     578             :                                          avpriv_mirror(y + 4, height - 1) *
     579             :                                          stride_line);
     580             : 
     581       47278 :     if (y > 0 && y + 4 < height) {
     582       29472 :         dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
     583             :     } else {
     584       17806 :         if (y + 3 < (unsigned)height)
     585        8596 :             vertical_compose97iL1(b3, b4, b5, width);
     586       17806 :         if (y + 2 < (unsigned)height)
     587        8596 :             vertical_compose97iH1(b2, b3, b4, width);
     588       17806 :         if (y + 1 < (unsigned)height)
     589        8596 :             vertical_compose97iL0(b1, b2, b3, width);
     590       17806 :         if (y + 0 < (unsigned)height)
     591        8596 :             vertical_compose97iH0(b0, b1, b2, width);
     592             :     }
     593             : 
     594       47278 :     if (y - 1 < (unsigned)height)
     595       38068 :         dsp->horizontal_compose97i(b0, temp, width);
     596       47278 :     if (y + 0 < (unsigned)height)
     597       38068 :         dsp->horizontal_compose97i(b1, temp, width);
     598             : 
     599       47278 :     cs->b0  = b2;
     600       47278 :     cs->b1  = b3;
     601       47278 :     cs->b2  = b4;
     602       47278 :     cs->b3  = b5;
     603       47278 :     cs->y  += 2;
     604       47278 : }
     605             : 
     606       97656 : static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer,
     607             :                                   IDWTELEM *temp, int width, int height,
     608             :                                   int stride)
     609             : {
     610       97656 :     int y        = cs->y;
     611       97656 :     IDWTELEM *b0 = cs->b0;
     612       97656 :     IDWTELEM *b1 = cs->b1;
     613       97656 :     IDWTELEM *b2 = cs->b2;
     614       97656 :     IDWTELEM *b3 = cs->b3;
     615       97656 :     IDWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride;
     616       97656 :     IDWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride;
     617             : 
     618       97656 :     if (y + 3 < (unsigned)height)
     619       84816 :         vertical_compose97iL1(b3, b4, b5, width);
     620       97656 :     if (y + 2 < (unsigned)height)
     621       84816 :         vertical_compose97iH1(b2, b3, b4, width);
     622       97656 :     if (y + 1 < (unsigned)height)
     623       84816 :         vertical_compose97iL0(b1, b2, b3, width);
     624       97656 :     if (y + 0 < (unsigned)height)
     625       84816 :         vertical_compose97iH0(b0, b1, b2, width);
     626             : 
     627       97656 :     if (y - 1 < (unsigned)height)
     628       84816 :         ff_snow_horizontal_compose97i(b0, temp, width);
     629       97656 :     if (y + 0 < (unsigned)height)
     630       84816 :         ff_snow_horizontal_compose97i(b1, temp, width);
     631             : 
     632       97656 :     cs->b0  = b2;
     633       97656 :     cs->b1  = b3;
     634       97656 :     cs->b2  = b4;
     635       97656 :     cs->b3  = b5;
     636       97656 :     cs->y  += 2;
     637       97656 : }
     638             : 
     639        1383 : void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
     640             :                                    int height, int stride_line, int type,
     641             :                                    int decomposition_count)
     642             : {
     643             :     int level;
     644        8298 :     for (level = decomposition_count - 1; level >= 0; level--) {
     645        6915 :         switch (type) {
     646        4605 :         case DWT_97:
     647        4605 :             spatial_compose97i_buffered_init(cs + level, sb, height >> level,
     648             :                                              stride_line << level);
     649        4605 :             break;
     650        2310 :         case DWT_53:
     651        2310 :             spatial_compose53i_buffered_init(cs + level, sb, height >> level,
     652             :                                              stride_line << level);
     653        2310 :             break;
     654             :         }
     655             :     }
     656        1383 : }
     657             : 
     658       33857 : void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs,
     659             :                                     slice_buffer *slice_buf, IDWTELEM *temp,
     660             :                                     int width, int height, int stride_line,
     661             :                                     int type, int decomposition_count, int y)
     662             : {
     663       33857 :     const int support = type == 1 ? 3 : 5;
     664             :     int level;
     665       33857 :     if (type == 2)
     666           0 :         return;
     667             : 
     668      203142 :     for (level = decomposition_count - 1; level >= 0; level--)
     669      474244 :         while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
     670      135674 :             switch (type) {
     671       47278 :             case DWT_97:
     672       47278 :                 spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp,
     673             :                                                width >> level,
     674             :                                                height >> level,
     675             :                                                stride_line << level);
     676       47278 :                 break;
     677       88396 :             case DWT_53:
     678       88396 :                 spatial_compose53i_dy_buffered(cs + level, slice_buf, temp,
     679             :                                                width >> level,
     680             :                                                height >> level,
     681             :                                                stride_line << level);
     682       88396 :                 break;
     683             :             }
     684             :         }
     685             : }
     686             : 
     687        1878 : static void spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width,
     688             :                                  int height, int stride, int type,
     689             :                                  int decomposition_count)
     690             : {
     691             :     int level;
     692       11268 :     for (level = decomposition_count - 1; level >= 0; level--) {
     693        9390 :         switch (type) {
     694        6420 :         case DWT_97:
     695        6420 :             spatial_compose97i_init(cs + level, buffer, height >> level,
     696             :                                     stride << level);
     697        6420 :             break;
     698        2970 :         case DWT_53:
     699        2970 :             spatial_compose53i_init(cs + level, buffer, height >> level,
     700             :                                     stride << level);
     701        2970 :             break;
     702             :         }
     703             :     }
     704        1878 : }
     705             : 
     706       50400 : static void spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer,
     707             :                                   IDWTELEM *temp, int width, int height,
     708             :                                   int stride, int type,
     709             :                                   int decomposition_count, int y)
     710             : {
     711       50400 :     const int support = type == 1 ? 3 : 5;
     712             :     int level;
     713       50400 :     if (type == 2)
     714           0 :         return;
     715             : 
     716      302400 :     for (level = decomposition_count - 1; level >= 0; level--)
     717      715308 :         while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
     718      211308 :             switch (type) {
     719       97656 :             case DWT_97:
     720       97656 :                 spatial_compose97i_dy(cs + level, buffer, temp, width >> level,
     721             :                                       height >> level, stride << level);
     722       97656 :                 break;
     723      113652 :             case DWT_53:
     724      113652 :                 spatial_compose53i_dy(cs + level, buffer, temp, width >> level,
     725             :                                       height >> level, stride << level);
     726      113652 :                 break;
     727             :             }
     728             :         }
     729             : }
     730             : 
     731        1878 : void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
     732             :                      int stride, int type, int decomposition_count)
     733             : {
     734             :     DWTCompose cs[MAX_DECOMPOSITIONS];
     735             :     int y;
     736        1878 :     spatial_idwt_init(cs, buffer, width, height, stride, type,
     737             :                          decomposition_count);
     738       52278 :     for (y = 0; y < height; y += 4)
     739       50400 :         spatial_idwt_slice(cs, buffer, temp, width, height, stride, type,
     740             :                               decomposition_count, y);
     741        1878 : }
     742             : 
     743      486640 : static inline int w_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size,
     744             :                       int w, int h, int type)
     745             : {
     746             :     int s, i, j;
     747      486640 :     const int dec_count = w == 8 ? 3 : 4;
     748             :     int tmp[32 * 32], tmp2[32];
     749             :     int level, ori;
     750             :     static const int scale[2][2][4][4] = {
     751             :         {
     752             :             { // 9/7 8x8 dec=3
     753             :                 { 268, 239, 239, 213 },
     754             :                 { 0,   224, 224, 152 },
     755             :                 { 0,   135, 135, 110 },
     756             :             },
     757             :             { // 9/7 16x16 or 32x32 dec=4
     758             :                 { 344, 310, 310, 280 },
     759             :                 { 0,   320, 320, 228 },
     760             :                 { 0,   175, 175, 136 },
     761             :                 { 0,   129, 129, 102 },
     762             :             }
     763             :         },
     764             :         {
     765             :             { // 5/3 8x8 dec=3
     766             :                 { 275, 245, 245, 218 },
     767             :                 { 0,   230, 230, 156 },
     768             :                 { 0,   138, 138, 113 },
     769             :             },
     770             :             { // 5/3 16x16 or 32x32 dec=4
     771             :                 { 352, 317, 317, 286 },
     772             :                 { 0,   328, 328, 233 },
     773             :                 { 0,   180, 180, 140 },
     774             :                 { 0,   132, 132, 105 },
     775             :             }
     776             :         }
     777             :     };
     778             : 
     779    13334128 :     for (i = 0; i < h; i++) {
     780   104727424 :         for (j = 0; j < w; j += 4) {
     781    91879936 :             tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4;
     782    91879936 :             tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4;
     783    91879936 :             tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4;
     784    91879936 :             tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4;
     785             :         }
     786    12847488 :         pix1 += line_size;
     787    12847488 :         pix2 += line_size;
     788             :     }
     789             : 
     790      486640 :     ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count);
     791             : 
     792      486640 :     s = 0;
     793             :     av_assert1(w == h);
     794     2433200 :     for (level = 0; level < dec_count; level++)
     795     8272880 :         for (ori = level ? 1 : 0; ori < 4; ori++) {
     796     6326320 :             int size   = w >> (dec_count - level);
     797     6326320 :             int sx     = (ori & 1) ? size : 0;
     798     6326320 :             int stride = 32 << (dec_count - level);
     799     6326320 :             int sy     = (ori & 2) ? stride >> 1 : 0;
     800             : 
     801    43262848 :             for (i = 0; i < size; i++)
     802   404456272 :                 for (j = 0; j < size; j++) {
     803   735039488 :                     int v = tmp[sx + sy + i * stride + j] *
     804   367519744 :                             scale[type][dec_count - 3][level][ori];
     805   367519744 :                     s += FFABS(v);
     806             :                 }
     807             :         }
     808             :     av_assert1(s >= 0);
     809      486640 :     return s >> 9;
     810             : }
     811             : 
     812           0 : static int w53_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
     813             : {
     814           0 :     return w_c(v, pix1, pix2, line_size, 8, h, 1);
     815             : }
     816             : 
     817           0 : static int w97_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
     818             : {
     819           0 :     return w_c(v, pix1, pix2, line_size, 8, h, 0);
     820             : }
     821             : 
     822           0 : static int w53_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
     823             : {
     824           0 :     return w_c(v, pix1, pix2, line_size, 16, h, 1);
     825             : }
     826             : 
     827      170312 : static int w97_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
     828             : {
     829      170312 :     return w_c(v, pix1, pix2, line_size, 16, h, 0);
     830             : }
     831             : 
     832           0 : int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
     833             : {
     834           0 :     return w_c(v, pix1, pix2, line_size, 32, h, 1);
     835             : }
     836             : 
     837      316328 : int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
     838             : {
     839      316328 :     return w_c(v, pix1, pix2, line_size, 32, h, 0);
     840             : }
     841             : 
     842         938 : av_cold void ff_dsputil_init_dwt(MECmpContext *c)
     843             : {
     844         938 :     c->w53[0] = w53_16_c;
     845         938 :     c->w53[1] = w53_8_c;
     846         938 :     c->w97[0] = w97_16_c;
     847         938 :     c->w97[1] = w97_8_c;
     848         938 : }
     849             : 
     850          31 : av_cold void ff_dwt_init(SnowDWTContext *c)
     851             : {
     852          31 :     c->vertical_compose97i   = ff_snow_vertical_compose97i;
     853          31 :     c->horizontal_compose97i = ff_snow_horizontal_compose97i;
     854          31 :     c->inner_add_yblock      = ff_snow_inner_add_yblock;
     855             : 
     856             :     if (HAVE_MMX)
     857          31 :         ff_dwt_init_x86(c);
     858          31 : }
     859             : 
     860             : 

Generated by: LCOV version 1.13