LCOV - code coverage report
Current view: top level - libavcodec - snow.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 379 443 85.6 %
Date: 2017-12-11 04:34:20 Functions: 15 21 71.4 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
       3             :  *
       4             :  * This file is part of FFmpeg.
       5             :  *
       6             :  * FFmpeg is free software; you can redistribute it and/or
       7             :  * modify it under the terms of the GNU Lesser General Public
       8             :  * License as published by the Free Software Foundation; either
       9             :  * version 2.1 of the License, or (at your option) any later version.
      10             :  *
      11             :  * FFmpeg is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :  * Lesser General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public
      17             :  * License along with FFmpeg; if not, write to the Free Software
      18             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      19             :  */
      20             : 
      21             : #include "libavutil/intmath.h"
      22             : #include "libavutil/log.h"
      23             : #include "libavutil/opt.h"
      24             : #include "avcodec.h"
      25             : #include "me_cmp.h"
      26             : #include "snow_dwt.h"
      27             : #include "internal.h"
      28             : #include "snow.h"
      29             : #include "snowdata.h"
      30             : 
      31             : #include "rangecoder.h"
      32             : #include "mathops.h"
      33             : #include "h263.h"
      34             : 
      35             : 
      36      710775 : void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
      37             :                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
      38             :     int y, x;
      39             :     IDWTELEM * dst;
      40     4521015 :     for(y=0; y<b_h; y++){
      41             :         //FIXME ugly misuse of obmc_stride
      42     3810240 :         const uint8_t *obmc1= obmc + y*obmc_stride;
      43     3810240 :         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
      44     3810240 :         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
      45     3810240 :         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
      46     3810240 :         dst = slice_buffer_get_line(sb, src_y + y);
      47    27656640 :         for(x=0; x<b_w; x++){
      48    47692800 :             int v=   obmc1[x] * block[3][x + y*src_stride]
      49    23846400 :                     +obmc2[x] * block[2][x + y*src_stride]
      50    23846400 :                     +obmc3[x] * block[1][x + y*src_stride]
      51    23846400 :                     +obmc4[x] * block[0][x + y*src_stride];
      52             : 
      53    23846400 :             v <<= 8 - LOG2_OBMC_MAX;
      54             :             if(FRAC_BITS != 8){
      55    23846400 :                 v >>= 8 - FRAC_BITS;
      56             :             }
      57    23846400 :             if(add){
      58    23846400 :                 v += dst[x + src_x];
      59    23846400 :                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
      60    23846400 :                 if(v&(~255)) v= ~(v>>31);
      61    23846400 :                 dst8[x + y*src_stride] = v;
      62             :             }else{
      63           0 :                 dst[x + src_x] -= v;
      64             :             }
      65             :         }
      66             :     }
      67      710775 : }
      68             : 
      69         982 : int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
      70             : {
      71             :     int ret, i;
      72         982 :     int edges_needed = av_codec_is_encoder(s->avctx->codec);
      73             : 
      74         982 :     frame->width  = s->avctx->width ;
      75         982 :     frame->height = s->avctx->height;
      76         982 :     if (edges_needed) {
      77         521 :         frame->width  += 2 * EDGE_WIDTH;
      78         521 :         frame->height += 2 * EDGE_WIDTH;
      79             :     }
      80         982 :     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
      81           0 :         return ret;
      82         982 :     if (edges_needed) {
      83        2084 :         for (i = 0; frame->data[i]; i++) {
      84        4689 :             int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
      85        1563 :                             frame->linesize[i] +
      86        1563 :                             (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
      87        1563 :             frame->data[i] += offset;
      88             :         }
      89         521 :         frame->width  = s->avctx->width;
      90         521 :         frame->height = s->avctx->height;
      91             :     }
      92             : 
      93         982 :     return 0;
      94             : }
      95             : 
      96         103 : void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
      97             :     int plane_index, level, orientation;
      98             : 
      99         412 :     for(plane_index=0; plane_index<3; plane_index++){
     100        2781 :         for(level=0; level<MAX_DECOMPOSITIONS; level++){
     101       10197 :             for(orientation=level ? 1:0; orientation<4; orientation++){
     102        7725 :                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
     103             :             }
     104             :         }
     105             :     }
     106         103 :     memset(s->header_state, MID_STATE, sizeof(s->header_state));
     107         103 :     memset(s->block_state, MID_STATE, sizeof(s->block_state));
     108         103 : }
     109             : 
     110         472 : int ff_snow_alloc_blocks(SnowContext *s){
     111         472 :     int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
     112         472 :     int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
     113             : 
     114         472 :     s->b_width = w;
     115         472 :     s->b_height= h;
     116             : 
     117         472 :     av_free(s->block);
     118         472 :     s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
     119         472 :     if (!s->block)
     120           0 :         return AVERROR(ENOMEM);
     121             : 
     122         472 :     return 0;
     123             : }
     124             : 
     125          31 : static av_cold void init_qexp(void){
     126             :     int i;
     127          31 :     double v=128;
     128             : 
     129        1023 :     for(i=0; i<QROOT; i++){
     130         992 :         ff_qexp[i]= lrintf(v);
     131         992 :         v *= pow(2, 1.0 / QROOT);
     132             :     }
     133          31 : }
     134     3230914 : static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
     135             :     static const uint8_t weight[64]={
     136             :     8,7,6,5,4,3,2,1,
     137             :     7,7,0,0,0,0,0,1,
     138             :     6,0,6,0,0,0,2,0,
     139             :     5,0,0,5,0,3,0,0,
     140             :     4,0,0,0,4,0,0,0,
     141             :     3,0,0,5,0,3,0,0,
     142             :     2,0,6,0,0,0,2,0,
     143             :     1,7,0,0,0,0,0,1,
     144             :     };
     145             : 
     146             :     static const uint8_t brane[256]={
     147             :     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
     148             :     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
     149             :     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
     150             :     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
     151             :     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
     152             :     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
     153             :     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
     154             :     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
     155             :     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
     156             :     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
     157             :     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
     158             :     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
     159             :     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
     160             :     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
     161             :     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
     162             :     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
     163             :     };
     164             : 
     165             :     static const uint8_t needs[16]={
     166             :     0,1,0,0,
     167             :     2,4,2,0,
     168             :     0,1,0,0,
     169             :     15
     170             :     };
     171             : 
     172             :     int x, y, b, r, l;
     173             :     int16_t tmpIt   [64*(32+HTAPS_MAX)];
     174             :     uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
     175     3230914 :     int16_t *tmpI= tmpIt;
     176     3230914 :     uint8_t *tmp2= tmp2t[0];
     177             :     const uint8_t *hpel[11];
     178             :     av_assert2(dx<16 && dy<16);
     179     3230914 :     r= brane[dx + 16*dy]&15;
     180     3230914 :     l= brane[dx + 16*dy]>>4;
     181             : 
     182     3230914 :     b= needs[l] | needs[r];
     183     3230914 :     if(p && !p->diag_mc)
     184           0 :         b= 15;
     185             : 
     186     3230914 :     if(b&5){
     187    35788556 :         for(y=0; y < b_h+HTAPS_MAX-1; y++){
     188   183721675 :             for(x=0; x < b_w; x++){
     189   150826880 :                 int a_1=src[x + HTAPS_MAX/2-4];
     190   150826880 :                 int a0= src[x + HTAPS_MAX/2-3];
     191   150826880 :                 int a1= src[x + HTAPS_MAX/2-2];
     192   150826880 :                 int a2= src[x + HTAPS_MAX/2-1];
     193   150826880 :                 int a3= src[x + HTAPS_MAX/2+0];
     194   150826880 :                 int a4= src[x + HTAPS_MAX/2+1];
     195   150826880 :                 int a5= src[x + HTAPS_MAX/2+2];
     196   150826880 :                 int a6= src[x + HTAPS_MAX/2+3];
     197   150826880 :                 int am=0;
     198   150826880 :                 if(!p || p->fast_mc){
     199   150826880 :                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
     200   150826880 :                     tmpI[x]= am;
     201   150826880 :                     am= (am+16)>>5;
     202             :                 }else{
     203           0 :                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
     204           0 :                     tmpI[x]= am;
     205           0 :                     am= (am+32)>>6;
     206             :                 }
     207             : 
     208   150826880 :                 if(am&(~255)) am= ~(am>>31);
     209   150826880 :                 tmp2[x]= am;
     210             :             }
     211    32894795 :             tmpI+= 64;
     212    32894795 :             tmp2+= 64;
     213    32894795 :             src += stride;
     214             :         }
     215     2893761 :         src -= stride*y;
     216             :     }
     217     3230914 :     src += HTAPS_MAX/2 - 1;
     218     3230914 :     tmp2= tmp2t[1];
     219             : 
     220     3230914 :     if(b&2){
     221    10642657 :         for(y=0; y < b_h; y++){
     222    58456136 :             for(x=0; x < b_w+1; x++){
     223    49802780 :                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
     224    49802780 :                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
     225    49802780 :                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
     226    49802780 :                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
     227    49802780 :                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
     228    49802780 :                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
     229    49802780 :                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
     230    49802780 :                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
     231    49802780 :                 int am=0;
     232    49802780 :                 if(!p || p->fast_mc)
     233    49802780 :                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
     234             :                 else
     235           0 :                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
     236             : 
     237    49802780 :                 if(am&(~255)) am= ~(am>>31);
     238    49802780 :                 tmp2[x]= am;
     239             :             }
     240     8653356 :             src += stride;
     241     8653356 :             tmp2+= 64;
     242             :         }
     243     1989301 :         src -= stride*y;
     244             :     }
     245     3230914 :     src += stride*(HTAPS_MAX/2 - 1);
     246     3230914 :     tmp2= tmp2t[2];
     247     3230914 :     tmpI= tmpIt;
     248     3230914 :     if(b&4){
     249    10412970 :         for(y=0; y < b_h; y++){
     250    49178480 :             for(x=0; x < b_w; x++){
     251    40716192 :                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
     252    40716192 :                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
     253    40716192 :                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
     254    40716192 :                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
     255    40716192 :                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
     256    40716192 :                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
     257    40716192 :                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
     258    40716192 :                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
     259    40716192 :                 int am=0;
     260    40716192 :                 if(!p || p->fast_mc)
     261    40716192 :                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
     262             :                 else
     263           0 :                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
     264    40716192 :                 if(am&(~255)) am= ~(am>>31);
     265    40716192 :                 tmp2[x]= am;
     266             :             }
     267     8462288 :             tmpI+= 64;
     268     8462288 :             tmp2+= 64;
     269             :         }
     270             :     }
     271             : 
     272     3230914 :     hpel[ 0]= src;
     273     3230914 :     hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
     274     3230914 :     hpel[ 2]= src + 1;
     275             : 
     276     3230914 :     hpel[ 4]= tmp2t[1];
     277     3230914 :     hpel[ 5]= tmp2t[2];
     278     3230914 :     hpel[ 6]= tmp2t[1] + 1;
     279             : 
     280     3230914 :     hpel[ 8]= src + stride;
     281     3230914 :     hpel[ 9]= hpel[1] + 64;
     282     3230914 :     hpel[10]= hpel[8] + 1;
     283             : 
     284             : #define MC_STRIDE(x) (needs[x] ? 64 : stride)
     285             : 
     286     3230914 :     if(b==15){
     287      788734 :         int dxy = dx / 8 + dy / 8 * 4;
     288      788734 :         const uint8_t *src1 = hpel[dxy    ];
     289      788734 :         const uint8_t *src2 = hpel[dxy + 1];
     290      788734 :         const uint8_t *src3 = hpel[dxy + 4];
     291      788734 :         const uint8_t *src4 = hpel[dxy + 5];
     292      788734 :         int stride1 = MC_STRIDE(dxy);
     293      788734 :         int stride2 = MC_STRIDE(dxy + 1);
     294      788734 :         int stride3 = MC_STRIDE(dxy + 4);
     295      788734 :         int stride4 = MC_STRIDE(dxy + 5);
     296      788734 :         dx&=7;
     297      788734 :         dy&=7;
     298     4253866 :         for(y=0; y < b_h; y++){
     299    20041380 :             for(x=0; x < b_w; x++){
     300    49728744 :                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
     301    33152496 :                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
     302             :             }
     303     3465132 :             src1+=stride1;
     304     3465132 :             src2+=stride2;
     305     3465132 :             src3+=stride3;
     306     3465132 :             src4+=stride4;
     307     3465132 :             dst +=stride;
     308             :         }
     309             :     }else{
     310     2442180 :         const uint8_t *src1= hpel[l];
     311     2442180 :         const uint8_t *src2= hpel[r];
     312     2442180 :         int stride1 = MC_STRIDE(l);
     313     2442180 :         int stride2 = MC_STRIDE(r);
     314     2442180 :         int a= weight[((dx&7) + (8*(dy&7)))];
     315     2442180 :         int b= 8-a;
     316    13135368 :         for(y=0; y < b_h; y++){
     317    63515852 :             for(x=0; x < b_w; x++){
     318    52822664 :                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
     319             :             }
     320    10693188 :             src1+=stride1;
     321    10693188 :             src2+=stride2;
     322    10693188 :             dst +=stride;
     323             :         }
     324             :     }
     325     3230914 : }
     326             : 
     327     9314786 : void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, const BlockNode *block, int plane_index, int w, int h){
     328     9314786 :     if(block->type & BLOCK_INTRA){
     329             :         int x, y;
     330     1208256 :         const unsigned color  = block->color[plane_index];
     331     1208256 :         const unsigned color4 = color*0x01010101;
     332     1208256 :         if(b_w==32){
     333      622710 :             for(y=0; y < b_h; y++){
     334      603840 :                 *(uint32_t*)&dst[0 + y*stride]= color4;
     335      603840 :                 *(uint32_t*)&dst[4 + y*stride]= color4;
     336      603840 :                 *(uint32_t*)&dst[8 + y*stride]= color4;
     337      603840 :                 *(uint32_t*)&dst[12+ y*stride]= color4;
     338      603840 :                 *(uint32_t*)&dst[16+ y*stride]= color4;
     339      603840 :                 *(uint32_t*)&dst[20+ y*stride]= color4;
     340      603840 :                 *(uint32_t*)&dst[24+ y*stride]= color4;
     341      603840 :                 *(uint32_t*)&dst[28+ y*stride]= color4;
     342             :             }
     343     1189386 :         }else if(b_w==16){
     344     2069211 :             for(y=0; y < b_h; y++){
     345     1939648 :                 *(uint32_t*)&dst[0 + y*stride]= color4;
     346     1939648 :                 *(uint32_t*)&dst[4 + y*stride]= color4;
     347     1939648 :                 *(uint32_t*)&dst[8 + y*stride]= color4;
     348     1939648 :                 *(uint32_t*)&dst[12+ y*stride]= color4;
     349             :             }
     350     1059823 :         }else if(b_w==8){
     351     4638946 :             for(y=0; y < b_h; y++){
     352     4113368 :                 *(uint32_t*)&dst[0 + y*stride]= color4;
     353     4113368 :                 *(uint32_t*)&dst[4 + y*stride]= color4;
     354             :             }
     355      534245 :         }else if(b_w==4){
     356     2710587 :             for(y=0; y < b_h; y++){
     357     2177236 :                 *(uint32_t*)&dst[0 + y*stride]= color4;
     358             :             }
     359             :         }else{
     360        4410 :             for(y=0; y < b_h; y++){
     361       10548 :                 for(x=0; x < b_w; x++){
     362        7032 :                     dst[x + y*stride]= color;
     363             :                 }
     364             :             }
     365             :         }
     366             :     }else{
     367     8106530 :         uint8_t *src= s->last_picture[block->ref]->data[plane_index];
     368     8106530 :         const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
     369     8106530 :         int mx= block->mx*scale;
     370     8106530 :         int my= block->my*scale;
     371     8106530 :         const int dx= mx&15;
     372     8106530 :         const int dy= my&15;
     373     8106530 :         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
     374     8106530 :         sx += (mx>>4) - (HTAPS_MAX/2-1);
     375     8106530 :         sy += (my>>4) - (HTAPS_MAX/2-1);
     376     8106530 :         src += sx + sy*stride;
     377     8106530 :         if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
     378     7595941 :            || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
     379     1156635 :             s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
     380             :                                      stride, stride,
     381             :                                      b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
     382             :                                      sx, sy, w, h);
     383     1156635 :             src= tmp + MB_SIZE;
     384             :         }
     385             : 
     386             :         av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
     387             : 
     388             :         av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
     389     8106530 :         if(    (dx&3) || (dy&3)
     390     4895874 :             || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
     391     4895874 :             || (b_w&(b_w-1))
     392     4895874 :             || b_w == 1
     393     4895874 :             || b_h == 1
     394     4895874 :             || !s->plane[plane_index].fast_mc )
     395     3210656 :             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
     396     4895874 :         else if(b_w==32){
     397             :             int y;
     398      892374 :             for(y=0; y<b_h; y+=16){
     399      594916 :                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
     400      594916 :                 s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
     401             :             }
     402     4598416 :         }else if(b_w==b_h)
     403     4248902 :             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
     404      349514 :         else if(b_w==2*b_h){
     405      205129 :             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
     406      205129 :             s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
     407             :         }else{
     408             :             av_assert2(2*b_w==b_h);
     409      144385 :             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
     410      144385 :             s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
     411             :         }
     412             :     }
     413     9314786 : }
     414             : 
     415             : #define mca(dx,dy,b_w)\
     416             : static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
     417             :     av_assert2(h==b_w);\
     418             :     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
     419             : }
     420             : 
     421           0 : mca( 0, 0,16)
     422        5509 : mca( 8, 0,16)
     423        4983 : mca( 0, 8,16)
     424        9766 : mca( 8, 8,16)
     425           0 : mca( 0, 0,8)
     426           0 : mca( 8, 0,8)
     427           0 : mca( 0, 8,8)
     428           0 : mca( 8, 8,8)
     429             : 
     430          31 : av_cold int ff_snow_common_init(AVCodecContext *avctx){
     431          31 :     SnowContext *s = avctx->priv_data;
     432             :     int width, height;
     433             :     int i, j;
     434             : 
     435          31 :     s->avctx= avctx;
     436          31 :     s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
     437          31 :     s->spatial_decomposition_count = 1;
     438             : 
     439          31 :     ff_me_cmp_init(&s->mecc, avctx);
     440          31 :     ff_hpeldsp_init(&s->hdsp, avctx->flags);
     441          31 :     ff_videodsp_init(&s->vdsp, 8);
     442          31 :     ff_dwt_init(&s->dwt);
     443          31 :     ff_h264qpel_init(&s->h264qpel, 8);
     444             : 
     445             : #define mcf(dx,dy)\
     446             :     s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
     447             :     s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
     448             :         s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
     449             :     s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
     450             :     s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
     451             :         s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
     452             : 
     453          31 :     mcf( 0, 0)
     454          31 :     mcf( 4, 0)
     455          31 :     mcf( 8, 0)
     456          31 :     mcf(12, 0)
     457          31 :     mcf( 0, 4)
     458          31 :     mcf( 4, 4)
     459          31 :     mcf( 8, 4)
     460          31 :     mcf(12, 4)
     461          31 :     mcf( 0, 8)
     462          31 :     mcf( 4, 8)
     463          31 :     mcf( 8, 8)
     464          31 :     mcf(12, 8)
     465          31 :     mcf( 0,12)
     466          31 :     mcf( 4,12)
     467          31 :     mcf( 8,12)
     468          31 :     mcf(12,12)
     469             : 
     470             : #define mcfh(dx,dy)\
     471             :     s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
     472             :     s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
     473             :         mc_block_hpel ## dx ## dy ## 16;\
     474             :     s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
     475             :     s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
     476             :         mc_block_hpel ## dx ## dy ## 8;
     477             : 
     478          31 :     mcfh(0, 0)
     479          31 :     mcfh(8, 0)
     480          31 :     mcfh(0, 8)
     481          31 :     mcfh(8, 8)
     482             : 
     483          31 :     init_qexp();
     484             : 
     485             : //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
     486             : 
     487          31 :     width= s->avctx->width;
     488          31 :     height= s->avctx->height;
     489             : 
     490          31 :     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
     491          31 :     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
     492          31 :     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
     493          31 :     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
     494          31 :     FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
     495             : 
     496         279 :     for(i=0; i<MAX_REF_FRAMES; i++) {
     497        2232 :         for(j=0; j<MAX_REF_FRAMES; j++)
     498        1984 :             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
     499         248 :         s->last_picture[i] = av_frame_alloc();
     500         248 :         if (!s->last_picture[i])
     501           0 :             goto fail;
     502             :     }
     503             : 
     504          31 :     s->mconly_picture = av_frame_alloc();
     505          31 :     s->current_picture = av_frame_alloc();
     506          31 :     if (!s->mconly_picture || !s->current_picture)
     507             :         goto fail;
     508             : 
     509          31 :     return 0;
     510           0 : fail:
     511           0 :     return AVERROR(ENOMEM);
     512             : }
     513             : 
     514         971 : int ff_snow_common_init_after_header(AVCodecContext *avctx) {
     515         971 :     SnowContext *s = avctx->priv_data;
     516             :     int plane_index, level, orientation;
     517             :     int ret, emu_buf_size;
     518             : 
     519         971 :     if(!s->scratchbuf) {
     520          31 :         if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
     521             :                                  AV_GET_BUFFER_FLAG_REF)) < 0)
     522           0 :             return ret;
     523          31 :         FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
     524          31 :         emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
     525          31 :         FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
     526             :     }
     527             : 
     528         971 :     if(s->mconly_picture->format != avctx->pix_fmt) {
     529           0 :         av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
     530           0 :         return AVERROR_INVALIDDATA;
     531             :     }
     532             : 
     533        3884 :     for(plane_index=0; plane_index < s->nb_planes; plane_index++){
     534        2913 :         int w= s->avctx->width;
     535        2913 :         int h= s->avctx->height;
     536             : 
     537        2913 :         if(plane_index){
     538        1942 :             w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
     539        1942 :             h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
     540             :         }
     541        2913 :         s->plane[plane_index].width = w;
     542        2913 :         s->plane[plane_index].height= h;
     543             : 
     544       17478 :         for(level=s->spatial_decomposition_count-1; level>=0; level--){
     545       61173 :             for(orientation=level ? 1 : 0; orientation<4; orientation++){
     546       46608 :                 SubBand *b= &s->plane[plane_index].band[level][orientation];
     547             : 
     548       46608 :                 b->buf= s->spatial_dwt_buffer;
     549       46608 :                 b->level= level;
     550       46608 :                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
     551       46608 :                 b->width = (w + !(orientation&1))>>1;
     552       46608 :                 b->height= (h + !(orientation>1))>>1;
     553             : 
     554       46608 :                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
     555       46608 :                 b->buf_x_offset = 0;
     556       46608 :                 b->buf_y_offset = 0;
     557             : 
     558       46608 :                 if(orientation&1){
     559       29130 :                     b->buf += (w+1)>>1;
     560       29130 :                     b->buf_x_offset = (w+1)>>1;
     561             :                 }
     562       46608 :                 if(orientation>1){
     563       29130 :                     b->buf += b->stride>>1;
     564       29130 :                     b->buf_y_offset = b->stride_line >> 1;
     565             :                 }
     566       46608 :                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
     567             : 
     568       46608 :                 if(level)
     569       34956 :                     b->parent= &s->plane[plane_index].band[level-1][orientation];
     570             :                 //FIXME avoid this realloc
     571       46608 :                 av_freep(&b->x_coeff);
     572       46608 :                 b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
     573       46608 :                 if (!b->x_coeff)
     574           0 :                     goto fail;
     575             :             }
     576       14565 :             w= (w+1)>>1;
     577       14565 :             h= (h+1)>>1;
     578             :         }
     579             :     }
     580             : 
     581         971 :     return 0;
     582           0 : fail:
     583           0 :     return AVERROR(ENOMEM);
     584             : }
     585             : 
     586             : #define USE_HALFPEL_PLANE 0
     587             : 
     588           0 : static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
     589             :     int p,x,y;
     590             : 
     591           0 :     for(p=0; p < s->nb_planes; p++){
     592           0 :         int is_chroma= !!p;
     593           0 :         int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
     594           0 :         int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
     595           0 :         int ls= frame->linesize[p];
     596           0 :         uint8_t *src= frame->data[p];
     597             : 
     598           0 :         halfpel[1][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
     599           0 :         halfpel[2][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
     600           0 :         halfpel[3][p] = av_malloc_array(ls, (h + 2 * EDGE_WIDTH));
     601           0 :         if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p]) {
     602           0 :             av_freep(&halfpel[1][p]);
     603           0 :             av_freep(&halfpel[2][p]);
     604           0 :             av_freep(&halfpel[3][p]);
     605           0 :             return AVERROR(ENOMEM);
     606             :         }
     607           0 :         halfpel[1][p] += EDGE_WIDTH * (1 + ls);
     608           0 :         halfpel[2][p] += EDGE_WIDTH * (1 + ls);
     609           0 :         halfpel[3][p] += EDGE_WIDTH * (1 + ls);
     610             : 
     611           0 :         halfpel[0][p]= src;
     612           0 :         for(y=0; y<h; y++){
     613           0 :             for(x=0; x<w; x++){
     614           0 :                 int i= y*ls + x;
     615             : 
     616           0 :                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
     617             :             }
     618             :         }
     619           0 :         for(y=0; y<h; y++){
     620           0 :             for(x=0; x<w; x++){
     621           0 :                 int i= y*ls + x;
     622             : 
     623           0 :                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
     624             :             }
     625             :         }
     626           0 :         src= halfpel[1][p];
     627           0 :         for(y=0; y<h; y++){
     628           0 :             for(x=0; x<w; x++){
     629           0 :                 int i= y*ls + x;
     630             : 
     631           0 :                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
     632             :             }
     633             :         }
     634             : 
     635             : //FIXME border!
     636             :     }
     637           0 :     return 0;
     638             : }
     639             : 
     640        1942 : void ff_snow_release_buffer(AVCodecContext *avctx)
     641             : {
     642        1942 :     SnowContext *s = avctx->priv_data;
     643             :     int i;
     644             : 
     645        1942 :     if(s->last_picture[s->max_ref_frames-1]->data[0]){
     646         940 :         av_frame_unref(s->last_picture[s->max_ref_frames-1]);
     647        9400 :         for(i=0; i<9; i++)
     648        8460 :             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) {
     649           0 :                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
     650           0 :                 s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] = NULL;
     651             :             }
     652             :     }
     653        1942 : }
     654             : 
     655         971 : int ff_snow_frame_start(SnowContext *s){
     656             :    AVFrame *tmp;
     657             :    int i, ret;
     658             : 
     659         971 :     ff_snow_release_buffer(s->avctx);
     660             : 
     661         971 :     tmp= s->last_picture[s->max_ref_frames-1];
     662         971 :     for(i=s->max_ref_frames-1; i>0; i--)
     663           0 :         s->last_picture[i] = s->last_picture[i-1];
     664         971 :     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
     665             :     if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
     666             :         if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
     667             :             return ret;
     668             :     }
     669         971 :     s->last_picture[0] = s->current_picture;
     670         971 :     s->current_picture = tmp;
     671             : 
     672         971 :     if(s->keyframe){
     673         103 :         s->ref_frames= 0;
     674             :     }else{
     675             :         int i;
     676        1736 :         for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
     677         868 :             if(i && s->last_picture[i-1]->key_frame)
     678           0 :                 break;
     679         868 :         s->ref_frames= i;
     680         868 :         if(s->ref_frames==0){
     681           0 :             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
     682           0 :             return AVERROR_INVALIDDATA;
     683             :         }
     684             :     }
     685         971 :     if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
     686           0 :         return ret;
     687             : 
     688         971 :     s->current_picture->key_frame= s->keyframe;
     689             : 
     690         971 :     return 0;
     691             : }
     692             : 
     693          31 : av_cold void ff_snow_common_end(SnowContext *s)
     694             : {
     695             :     int plane_index, level, orientation, i;
     696             : 
     697          31 :     av_freep(&s->spatial_dwt_buffer);
     698          31 :     av_freep(&s->temp_dwt_buffer);
     699          31 :     av_freep(&s->spatial_idwt_buffer);
     700          31 :     av_freep(&s->temp_idwt_buffer);
     701          31 :     av_freep(&s->run_buffer);
     702             : 
     703          31 :     s->m.me.temp= NULL;
     704          31 :     av_freep(&s->m.me.scratchpad);
     705          31 :     av_freep(&s->m.me.map);
     706          31 :     av_freep(&s->m.me.score_map);
     707          31 :     av_freep(&s->m.sc.obmc_scratchpad);
     708             : 
     709          31 :     av_freep(&s->block);
     710          31 :     av_freep(&s->scratchbuf);
     711          31 :     av_freep(&s->emu_edge_buffer);
     712             : 
     713         279 :     for(i=0; i<MAX_REF_FRAMES; i++){
     714         248 :         av_freep(&s->ref_mvs[i]);
     715         248 :         av_freep(&s->ref_scores[i]);
     716         248 :         if(s->last_picture[i] && s->last_picture[i]->data[0]) {
     717           0 :             av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
     718             :         }
     719         248 :         av_frame_free(&s->last_picture[i]);
     720             :     }
     721             : 
     722         155 :     for(plane_index=0; plane_index < MAX_PLANES; plane_index++){
     723        1116 :         for(level=MAX_DECOMPOSITIONS-1; level>=0; level--){
     724        4092 :             for(orientation=level ? 1 : 0; orientation<4; orientation++){
     725        3100 :                 SubBand *b= &s->plane[plane_index].band[level][orientation];
     726             : 
     727        3100 :                 av_freep(&b->x_coeff);
     728             :             }
     729             :         }
     730             :     }
     731          31 :     av_frame_free(&s->mconly_picture);
     732          31 :     av_frame_free(&s->current_picture);
     733          31 : }

Generated by: LCOV version 1.13