LCOV - code coverage report
Current view: top level - libavcodec - rv40dsp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 197 201 98.0 %
Date: 2017-12-16 13:57:32 Functions: 68 82 82.9 %

          Line data    Source code
       1             : /*
       2             :  * RV40 decoder motion compensation functions
       3             :  * Copyright (c) 2008 Konstantin Shishkov
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : /**
      23             :  * @file
      24             :  * RV40 decoder motion compensation functions
      25             :  */
      26             : 
      27             : #include "libavutil/common.h"
      28             : #include "libavutil/intreadwrite.h"
      29             : #include "avcodec.h"
      30             : #include "h264qpel.h"
      31             : #include "mathops.h"
      32             : #include "pixels.h"
      33             : #include "rnd_avg.h"
      34             : #include "rv34dsp.h"
      35             : #include "libavutil/avassert.h"
      36             : 
      37             : #define RV40_LOWPASS(OPNAME, OP) \
      38             : static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
      39             :                                                      const int h, const int C1, const int C2, const int SHIFT){\
      40             :     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
      41             :     int i;\
      42             :     for(i = 0; i < h; i++)\
      43             :     {\
      44             :         OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      45             :         OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      46             :         OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      47             :         OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      48             :         OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      49             :         OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      50             :         OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      51             :         OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      52             :         dst += dstStride;\
      53             :         src += srcStride;\
      54             :     }\
      55             : }\
      56             : \
      57             : static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
      58             :                                            const int w, const int C1, const int C2, const int SHIFT){\
      59             :     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
      60             :     int i;\
      61             :     for(i = 0; i < w; i++)\
      62             :     {\
      63             :         const int srcB  = src[-2*srcStride];\
      64             :         const int srcA  = src[-1*srcStride];\
      65             :         const int src0  = src[0 *srcStride];\
      66             :         const int src1  = src[1 *srcStride];\
      67             :         const int src2  = src[2 *srcStride];\
      68             :         const int src3  = src[3 *srcStride];\
      69             :         const int src4  = src[4 *srcStride];\
      70             :         const int src5  = src[5 *srcStride];\
      71             :         const int src6  = src[6 *srcStride];\
      72             :         const int src7  = src[7 *srcStride];\
      73             :         const int src8  = src[8 *srcStride];\
      74             :         const int src9  = src[9 *srcStride];\
      75             :         const int src10 = src[10*srcStride];\
      76             :         OP(dst[0*dstStride], (srcB + src3  - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      77             :         OP(dst[1*dstStride], (srcA + src4  - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      78             :         OP(dst[2*dstStride], (src0 + src5  - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      79             :         OP(dst[3*dstStride], (src1 + src6  - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      80             :         OP(dst[4*dstStride], (src2 + src7  - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      81             :         OP(dst[5*dstStride], (src3 + src8  - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      82             :         OP(dst[6*dstStride], (src4 + src9  - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      83             :         OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
      84             :         dst++;\
      85             :         src++;\
      86             :     }\
      87             : }\
      88             : \
      89             : static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
      90             :                                             const int w, const int C1, const int C2, const int SHIFT){\
      91             :     OPNAME ## rv40_qpel8_v_lowpass(dst  , src  , dstStride, srcStride, 8, C1, C2, SHIFT);\
      92             :     OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
      93             :     src += 8*srcStride;\
      94             :     dst += 8*dstStride;\
      95             :     OPNAME ## rv40_qpel8_v_lowpass(dst  , src  , dstStride, srcStride, w-8, C1, C2, SHIFT);\
      96             :     OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
      97             : }\
      98             : \
      99             : static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
     100             :                                             const int h, const int C1, const int C2, const int SHIFT){\
     101             :     OPNAME ## rv40_qpel8_h_lowpass(dst  , src  , dstStride, srcStride, 8, C1, C2, SHIFT);\
     102             :     OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
     103             :     src += 8*srcStride;\
     104             :     dst += 8*dstStride;\
     105             :     OPNAME ## rv40_qpel8_h_lowpass(dst  , src  , dstStride, srcStride, h-8, C1, C2, SHIFT);\
     106             :     OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
     107             : }\
     108             : \
     109             : 
     110             : #define RV40_MC(OPNAME, SIZE) \
     111             : static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     112             : {\
     113             :     OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
     114             : }\
     115             : \
     116             : static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     117             : {\
     118             :     OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
     119             : }\
     120             : \
     121             : static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     122             : {\
     123             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
     124             : }\
     125             : \
     126             : static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     127             : {\
     128             :     uint8_t full[SIZE*(SIZE+5)];\
     129             :     uint8_t * const full_mid = full + SIZE*2;\
     130             :     put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
     131             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
     132             : }\
     133             : \
     134             : static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     135             : {\
     136             :     uint8_t full[SIZE*(SIZE+5)];\
     137             :     uint8_t * const full_mid = full + SIZE*2;\
     138             :     put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
     139             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
     140             : }\
     141             : \
     142             : static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     143             : {\
     144             :     uint8_t full[SIZE*(SIZE+5)];\
     145             :     uint8_t * const full_mid = full + SIZE*2;\
     146             :     put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
     147             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
     148             : }\
     149             : \
     150             : static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     151             : {\
     152             :     uint8_t full[SIZE*(SIZE+5)];\
     153             :     uint8_t * const full_mid = full + SIZE*2;\
     154             :     put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
     155             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
     156             : }\
     157             : \
     158             : static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     159             : {\
     160             :     uint8_t full[SIZE*(SIZE+5)];\
     161             :     uint8_t * const full_mid = full + SIZE*2;\
     162             :     put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
     163             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
     164             : }\
     165             : \
     166             : static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     167             : {\
     168             :     uint8_t full[SIZE*(SIZE+5)];\
     169             :     uint8_t * const full_mid = full + SIZE*2;\
     170             :     put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
     171             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
     172             : }\
     173             : \
     174             : static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     175             : {\
     176             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
     177             : }\
     178             : \
     179             : static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     180             : {\
     181             :     uint8_t full[SIZE*(SIZE+5)];\
     182             :     uint8_t * const full_mid = full + SIZE*2;\
     183             :     put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
     184             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
     185             : }\
     186             : \
     187             : static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
     188             : {\
     189             :     uint8_t full[SIZE*(SIZE+5)];\
     190             :     uint8_t * const full_mid = full + SIZE*2;\
     191             :     put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
     192             :     OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
     193             : }\
     194             : \
     195             : 
     196             : #define op_avg(a, b)  a = (((a)+cm[b]+1)>>1)
     197             : #define op_put(a, b)  a = cm[b]
     198             : 
     199      289212 : RV40_LOWPASS(put_       , op_put)
     200       19055 : RV40_LOWPASS(avg_       , op_avg)
     201             : 
     202             : #undef op_avg
     203             : #undef op_put
     204             : 
     205       58601 : RV40_MC(put_, 8)
     206       23297 : RV40_MC(put_, 16)
     207           0 : RV40_MC(avg_, 8)
     208        3811 : RV40_MC(avg_, 16)
     209             : 
     210             : #define PIXOP2(OPNAME, OP)                                              \
     211             : static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block,           \
     212             :                                               const uint8_t *pixels,    \
     213             :                                               ptrdiff_t line_size,      \
     214             :                                               int h)                    \
     215             : {                                                                       \
     216             :     /* FIXME HIGH BIT DEPTH */                                          \
     217             :     int j;                                                              \
     218             :                                                                         \
     219             :     for (j = 0; j < 2; j++) {                                           \
     220             :         int i;                                                          \
     221             :         const uint32_t a = AV_RN32(pixels);                             \
     222             :         const uint32_t b = AV_RN32(pixels + 1);                         \
     223             :         uint32_t l0 = (a & 0x03030303UL) +                              \
     224             :                       (b & 0x03030303UL) +                              \
     225             :                            0x02020202UL;                                \
     226             :         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                       \
     227             :                       ((b & 0xFCFCFCFCUL) >> 2);                        \
     228             :         uint32_t l1, h1;                                                \
     229             :                                                                         \
     230             :         pixels += line_size;                                            \
     231             :         for (i = 0; i < h; i += 2) {                                    \
     232             :             uint32_t a = AV_RN32(pixels);                               \
     233             :             uint32_t b = AV_RN32(pixels + 1);                           \
     234             :             l1 = (a & 0x03030303UL) +                                   \
     235             :                  (b & 0x03030303UL);                                    \
     236             :             h1 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
     237             :                  ((b & 0xFCFCFCFCUL) >> 2);                             \
     238             :             OP(*((uint32_t *) block),                                   \
     239             :                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
     240             :             pixels += line_size;                                        \
     241             :             block  += line_size;                                        \
     242             :             a = AV_RN32(pixels);                                        \
     243             :             b = AV_RN32(pixels + 1);                                    \
     244             :             l0 = (a & 0x03030303UL) +                                   \
     245             :                  (b & 0x03030303UL) +                                   \
     246             :                       0x02020202UL;                                     \
     247             :             h0 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
     248             :                  ((b & 0xFCFCFCFCUL) >> 2);                             \
     249             :             OP(*((uint32_t *) block),                                   \
     250             :                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
     251             :             pixels += line_size;                                        \
     252             :             block  += line_size;                                        \
     253             :         }                                                               \
     254             :         pixels += 4 - line_size * (h + 1);                              \
     255             :         block  += 4 - line_size * h;                                    \
     256             :     }                                                                   \
     257             : }                                                                       \
     258             :                                                                         \
     259             : CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c,                             \
     260             :                OPNAME ## _pixels8_xy2_8_c,                              \
     261             :                8)                                                       \
     262             : 
     263             : #define op_avg(a, b) a = rnd_avg32(a, b)
     264             : #define op_put(a, b) a = b
     265       14484 : PIXOP2(avg, op_avg)
     266       79744 : PIXOP2(put, op_put)
     267             : #undef op_avg
     268             : #undef op_put
     269             : 
     270       20935 : static void put_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
     271             : {
     272       20935 :     put_pixels16_xy2_8_c(dst, src, stride, 16);
     273       20935 : }
     274        4828 : static void avg_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
     275             : {
     276        4828 :     avg_pixels16_xy2_8_c(dst, src, stride, 16);
     277        4828 : }
     278       16939 : static void put_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
     279             : {
     280       16939 :     put_pixels8_xy2_8_c(dst, src, stride, 8);
     281       16939 : }
     282           0 : static void avg_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
     283             : {
     284           0 :     avg_pixels8_xy2_8_c(dst, src, stride, 8);
     285           0 : }
     286             : 
     287             : static const int rv40_bias[4][4] = {
     288             :     {  0, 16, 32, 16 },
     289             :     { 32, 28, 32, 28 },
     290             :     {  0, 32, 16, 32 },
     291             :     { 32, 28, 32, 28 }
     292             : };
     293             : 
     294             : #define RV40_CHROMA_MC(OPNAME, OP)\
     295             : static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst /*align 8*/,\
     296             :                                         uint8_t *src /*align 1*/,\
     297             :                                         ptrdiff_t stride, int h, int x, int y)\
     298             : {\
     299             :     const int A = (8-x) * (8-y);\
     300             :     const int B = (  x) * (8-y);\
     301             :     const int C = (8-x) * (  y);\
     302             :     const int D = (  x) * (  y);\
     303             :     int i;\
     304             :     int bias = rv40_bias[y>>1][x>>1];\
     305             :     \
     306             :     av_assert2(x<8 && y<8 && x>=0 && y>=0);\
     307             : \
     308             :     if(D){\
     309             :         for(i = 0; i < h; i++){\
     310             :             OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
     311             :             OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
     312             :             OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
     313             :             OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
     314             :             dst += stride;\
     315             :             src += stride;\
     316             :         }\
     317             :     }else{\
     318             :         const int E = B + C;\
     319             :         const ptrdiff_t step = C ? stride : 1;\
     320             :         for(i = 0; i < h; i++){\
     321             :             OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
     322             :             OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
     323             :             OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
     324             :             OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
     325             :             dst += stride;\
     326             :             src += stride;\
     327             :         }\
     328             :     }\
     329             : }\
     330             : \
     331             : static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/,\
     332             :                                         uint8_t *src/*align 1*/,\
     333             :                                         ptrdiff_t stride, int h, int x, int y)\
     334             : {\
     335             :     const int A = (8-x) * (8-y);\
     336             :     const int B = (  x) * (8-y);\
     337             :     const int C = (8-x) * (  y);\
     338             :     const int D = (  x) * (  y);\
     339             :     int i;\
     340             :     int bias = rv40_bias[y>>1][x>>1];\
     341             :     \
     342             :     av_assert2(x<8 && y<8 && x>=0 && y>=0);\
     343             : \
     344             :     if(D){\
     345             :         for(i = 0; i < h; i++){\
     346             :             OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
     347             :             OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
     348             :             OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
     349             :             OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
     350             :             OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
     351             :             OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
     352             :             OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
     353             :             OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
     354             :             dst += stride;\
     355             :             src += stride;\
     356             :         }\
     357             :     }else{\
     358             :         const int E = B + C;\
     359             :         const ptrdiff_t step = C ? stride : 1;\
     360             :         for(i = 0; i < h; i++){\
     361             :             OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
     362             :             OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
     363             :             OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
     364             :             OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
     365             :             OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
     366             :             OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
     367             :             OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
     368             :             OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
     369             :             dst += stride;\
     370             :             src += stride;\
     371             :         }\
     372             :     }\
     373             : }
     374             : 
     375             : #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
     376             : #define op_put(a, b) a = ((b)>>6)
     377             : 
     378      749544 : RV40_CHROMA_MC(put_, op_put)
     379       18526 : RV40_CHROMA_MC(avg_, op_avg)
     380             : 
     381             : #define RV40_WEIGHT_FUNC(size) \
     382             : static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
     383             : {\
     384             :     int i, j;\
     385             : \
     386             :     for (j = 0; j < size; j++) {\
     387             :         for (i = 0; i < size; i++)\
     388             :             dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
     389             :         src1 += stride;\
     390             :         src2 += stride;\
     391             :         dst  += stride;\
     392             :     }\
     393             : }\
     394             : static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
     395             : {\
     396             :     int i, j;\
     397             : \
     398             :     for (j = 0; j < size; j++) {\
     399             :         for (i = 0; i < size; i++)\
     400             :             dst[i] = (w2 * src1[i] + w1 * src2[i] + 0x10) >> 5;\
     401             :         src1 += stride;\
     402             :         src2 += stride;\
     403             :         dst  += stride;\
     404             :     }\
     405             : }
     406             : 
     407      115964 : RV40_WEIGHT_FUNC(16)
     408      231928 : RV40_WEIGHT_FUNC(8)
     409             : 
     410             : /**
     411             :  * dither values for deblocking filter - left/top values
     412             :  */
     413             : static const uint8_t rv40_dither_l[16] = {
     414             :     0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
     415             :     0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
     416             : };
     417             : 
     418             : /**
     419             :  * dither values for deblocking filter - right/bottom values
     420             :  */
     421             : static const uint8_t rv40_dither_r[16] = {
     422             :     0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
     423             :     0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
     424             : };
     425             : 
     426             : #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
     427             : /**
     428             :  * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
     429             :  */
     430     1488979 : static av_always_inline void rv40_weak_loop_filter(uint8_t *src,
     431             :                                                    const int step,
     432             :                                                    const ptrdiff_t stride,
     433             :                                                    const int filter_p1,
     434             :                                                    const int filter_q1,
     435             :                                                    const int alpha,
     436             :                                                    const int beta,
     437             :                                                    const int lim_p0q0,
     438             :                                                    const int lim_q1,
     439             :                                                    const int lim_p1)
     440             : {
     441     1488979 :     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
     442             :     int i, t, u, diff;
     443             : 
     444     7444895 :     for (i = 0; i < 4; i++, src += stride) {
     445     5955916 :         int diff_p1p0 = src[-2*step] - src[-1*step];
     446     5955916 :         int diff_q1q0 = src[ 1*step] - src[ 0*step];
     447     5955916 :         int diff_p1p2 = src[-2*step] - src[-3*step];
     448     5955916 :         int diff_q1q2 = src[ 1*step] - src[ 2*step];
     449             : 
     450     5955916 :         t = src[0*step] - src[-1*step];
     451     5955916 :         if (!t)
     452     1305988 :             continue;
     453             : 
     454     4649928 :         u = (alpha * FFABS(t)) >> 7;
     455     4649928 :         if (u > 3 - (filter_p1 && filter_q1))
     456      515619 :             continue;
     457             : 
     458     4134309 :         t *= 1 << 2;
     459     4134309 :         if (filter_p1 && filter_q1)
     460     3431142 :             t += src[-2*step] - src[1*step];
     461             : 
     462     4134309 :         diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
     463     4134309 :         src[-1*step] = cm[src[-1*step] + diff];
     464     4134309 :         src[ 0*step] = cm[src[ 0*step] - diff];
     465             : 
     466     4134309 :         if (filter_p1 && FFABS(diff_p1p2) <= beta) {
     467     3566828 :             t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
     468     3566828 :             src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
     469             :         }
     470             : 
     471     4134309 :         if (filter_q1 && FFABS(diff_q1q2) <= beta) {
     472     3569006 :             t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
     473     3569006 :             src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
     474             :         }
     475             :     }
     476     1488979 : }
     477             : 
     478      747005 : static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
     479             :                                     const int filter_p1, const int filter_q1,
     480             :                                     const int alpha, const int beta,
     481             :                                     const int lim_p0q0, const int lim_q1,
     482             :                                     const int lim_p1)
     483             : {
     484      747005 :     rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1,
     485             :                           alpha, beta, lim_p0q0, lim_q1, lim_p1);
     486      747005 : }
     487             : 
     488      741974 : static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
     489             :                                     const int filter_p1, const int filter_q1,
     490             :                                     const int alpha, const int beta,
     491             :                                     const int lim_p0q0, const int lim_q1,
     492             :                                     const int lim_p1)
     493             : {
     494      741974 :     rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1,
     495             :                           alpha, beta, lim_p0q0, lim_q1, lim_p1);
     496      741974 : }
     497             : 
     498      328913 : static av_always_inline void rv40_strong_loop_filter(uint8_t *src,
     499             :                                                      const int step,
     500             :                                                      const ptrdiff_t stride,
     501             :                                                      const int alpha,
     502             :                                                      const int lims,
     503             :                                                      const int dmode,
     504             :                                                      const int chroma)
     505             : {
     506             :     int i;
     507             : 
     508     1644565 :     for(i = 0; i < 4; i++, src += stride){
     509             :         int sflag, p0, q0, p1, q1;
     510     1315652 :         int t = src[0*step] - src[-1*step];
     511             : 
     512     1315652 :         if (!t)
     513      443419 :             continue;
     514             : 
     515      872233 :         sflag = (alpha * FFABS(t)) >> 7;
     516      872233 :         if (sflag > 1)
     517      128838 :             continue;
     518             : 
     519     2230185 :         p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
     520     2230185 :               26*src[ 0*step] + 25*src[ 1*step] +
     521      743395 :               rv40_dither_l[dmode + i]) >> 7;
     522             : 
     523     2230185 :         q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
     524     2230185 :               26*src[ 1*step] + 25*src[ 2*step] +
     525      743395 :               rv40_dither_r[dmode + i]) >> 7;
     526             : 
     527      743395 :         if (sflag) {
     528      193774 :             p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
     529      193774 :             q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
     530             :         }
     531             : 
     532     2230185 :         p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
     533     1486790 :               25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
     534     2230185 :         q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
     535     1486790 :               25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
     536             : 
     537      743395 :         if (sflag) {
     538      193774 :             p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
     539      193774 :             q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
     540             :         }
     541             : 
     542      743395 :         src[-2*step] = p1;
     543      743395 :         src[-1*step] = p0;
     544      743395 :         src[ 0*step] = q0;
     545      743395 :         src[ 1*step] = q1;
     546             : 
     547      743395 :         if(!chroma){
     548      996600 :             src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
     549      664400 :                             51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
     550      996600 :             src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
     551      664400 :                             51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
     552             :         }
     553             :     }
     554      328913 : }
     555             : 
     556      173906 : static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
     557             :                                       const int alpha, const int lims,
     558             :                                       const int dmode, const int chroma)
     559             : {
     560      173906 :     rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma);
     561      173906 : }
     562             : 
     563      155007 : static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
     564             :                                       const int alpha, const int lims,
     565             :                                       const int dmode, const int chroma)
     566             : {
     567      155007 :     rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma);
     568      155007 : }
     569             : 
     570     2315053 : static av_always_inline int rv40_loop_filter_strength(uint8_t *src,
     571             :                                                       int step, ptrdiff_t stride,
     572             :                                                       int beta, int beta2,
     573             :                                                       int edge,
     574             :                                                       int *p1, int *q1)
     575             : {
     576     2315053 :     int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
     577     2315053 :     int strong0 = 0, strong1 = 0;
     578             :     uint8_t *ptr;
     579             :     int i;
     580             : 
     581    11575265 :     for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
     582     9260212 :         sum_p1p0 += ptr[-2*step] - ptr[-1*step];
     583     9260212 :         sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
     584             :     }
     585             : 
     586     2315053 :     *p1 = FFABS(sum_p1p0) < (beta << 2);
     587     2315053 :     *q1 = FFABS(sum_q1q0) < (beta << 2);
     588             : 
     589     2315053 :     if(!*p1 && !*q1)
     590      497161 :         return 0;
     591             : 
     592     1817892 :     if (!edge)
     593     1423014 :         return 0;
     594             : 
     595     1974390 :     for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
     596     1579512 :         sum_p1p2 += ptr[-2*step] - ptr[-3*step];
     597     1579512 :         sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
     598             :     }
     599             : 
     600      394878 :     strong0 = *p1 && (FFABS(sum_p1p2) < beta2);
     601      394878 :     strong1 = *q1 && (FFABS(sum_q1q2) < beta2);
     602             : 
     603      394878 :     return strong0 && strong1;
     604             : }
     605             : 
     606     1170505 : static int rv40_h_loop_filter_strength(uint8_t *src, ptrdiff_t stride,
     607             :                                        int beta, int beta2, int edge,
     608             :                                        int *p1, int *q1)
     609             : {
     610     1170505 :     return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1);
     611             : }
     612             : 
     613     1144548 : static int rv40_v_loop_filter_strength(uint8_t *src, ptrdiff_t stride,
     614             :                                        int beta, int beta2, int edge,
     615             :                                        int *p1, int *q1)
     616             : {
     617     1144548 :     return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, q1);
     618             : }
     619             : 
     620           4 : av_cold void ff_rv40dsp_init(RV34DSPContext *c)
     621             : {
     622             :     H264QpelContext qpel;
     623             : 
     624           4 :     ff_rv34dsp_init(c);
     625           4 :     ff_h264qpel_init(&qpel, 8);
     626             : 
     627           4 :     c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0];
     628           4 :     c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
     629           4 :     c->put_pixels_tab[0][ 2] = qpel.put_h264_qpel_pixels_tab[0][2];
     630           4 :     c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
     631           4 :     c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
     632           4 :     c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
     633           4 :     c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
     634           4 :     c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
     635           4 :     c->put_pixels_tab[0][ 8] = qpel.put_h264_qpel_pixels_tab[0][8];
     636           4 :     c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
     637           4 :     c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
     638           4 :     c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
     639           4 :     c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
     640           4 :     c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
     641           4 :     c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
     642           4 :     c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
     643           4 :     c->avg_pixels_tab[0][ 0] = qpel.avg_h264_qpel_pixels_tab[0][0];
     644           4 :     c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
     645           4 :     c->avg_pixels_tab[0][ 2] = qpel.avg_h264_qpel_pixels_tab[0][2];
     646           4 :     c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
     647           4 :     c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
     648           4 :     c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
     649           4 :     c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
     650           4 :     c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
     651           4 :     c->avg_pixels_tab[0][ 8] = qpel.avg_h264_qpel_pixels_tab[0][8];
     652           4 :     c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
     653           4 :     c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
     654           4 :     c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
     655           4 :     c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
     656           4 :     c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
     657           4 :     c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
     658           4 :     c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
     659           4 :     c->put_pixels_tab[1][ 0] = qpel.put_h264_qpel_pixels_tab[1][0];
     660           4 :     c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
     661           4 :     c->put_pixels_tab[1][ 2] = qpel.put_h264_qpel_pixels_tab[1][2];
     662           4 :     c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
     663           4 :     c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
     664           4 :     c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
     665           4 :     c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
     666           4 :     c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
     667           4 :     c->put_pixels_tab[1][ 8] = qpel.put_h264_qpel_pixels_tab[1][8];
     668           4 :     c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
     669           4 :     c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
     670           4 :     c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
     671           4 :     c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
     672           4 :     c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
     673           4 :     c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
     674           4 :     c->put_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
     675           4 :     c->avg_pixels_tab[1][ 0] = qpel.avg_h264_qpel_pixels_tab[1][0];
     676           4 :     c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
     677           4 :     c->avg_pixels_tab[1][ 2] = qpel.avg_h264_qpel_pixels_tab[1][2];
     678           4 :     c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
     679           4 :     c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
     680           4 :     c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
     681           4 :     c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
     682           4 :     c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
     683           4 :     c->avg_pixels_tab[1][ 8] = qpel.avg_h264_qpel_pixels_tab[1][8];
     684           4 :     c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
     685           4 :     c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
     686           4 :     c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
     687           4 :     c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
     688           4 :     c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
     689           4 :     c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
     690           4 :     c->avg_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
     691             : 
     692           4 :     c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c;
     693           4 :     c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c;
     694           4 :     c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
     695           4 :     c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
     696             : 
     697           4 :     c->rv40_weight_pixels_tab[0][0] = rv40_weight_func_rnd_16;
     698           4 :     c->rv40_weight_pixels_tab[0][1] = rv40_weight_func_rnd_8;
     699           4 :     c->rv40_weight_pixels_tab[1][0] = rv40_weight_func_nornd_16;
     700           4 :     c->rv40_weight_pixels_tab[1][1] = rv40_weight_func_nornd_8;
     701             : 
     702           4 :     c->rv40_weak_loop_filter[0]     = rv40_h_weak_loop_filter;
     703           4 :     c->rv40_weak_loop_filter[1]     = rv40_v_weak_loop_filter;
     704           4 :     c->rv40_strong_loop_filter[0]   = rv40_h_strong_loop_filter;
     705           4 :     c->rv40_strong_loop_filter[1]   = rv40_v_strong_loop_filter;
     706           4 :     c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength;
     707           4 :     c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength;
     708             : 
     709             :     if (ARCH_AARCH64)
     710             :         ff_rv40dsp_init_aarch64(c);
     711             :     if (ARCH_ARM)
     712             :         ff_rv40dsp_init_arm(c);
     713             :     if (ARCH_X86)
     714           4 :         ff_rv40dsp_init_x86(c);
     715           4 : }

Generated by: LCOV version 1.13