LCOV - code coverage report
Current view: top level - libswscale - output.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1006 1505 66.8 %
Date: 2018-05-20 11:54:08 Functions: 104 227 45.8 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
       3             :  *
       4             :  * This file is part of FFmpeg.
       5             :  *
       6             :  * FFmpeg is free software; you can redistribute it and/or
       7             :  * modify it under the terms of the GNU Lesser General Public
       8             :  * License as published by the Free Software Foundation; either
       9             :  * version 2.1 of the License, or (at your option) any later version.
      10             :  *
      11             :  * FFmpeg is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :  * Lesser General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public
      17             :  * License along with FFmpeg; if not, write to the Free Software
      18             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      19             :  */
      20             : 
      21             : #include <math.h>
      22             : #include <stdint.h>
      23             : #include <stdio.h>
      24             : #include <string.h>
      25             : 
      26             : #include "libavutil/attributes.h"
      27             : #include "libavutil/avutil.h"
      28             : #include "libavutil/avassert.h"
      29             : #include "libavutil/bswap.h"
      30             : #include "libavutil/cpu.h"
      31             : #include "libavutil/intreadwrite.h"
      32             : #include "libavutil/mathematics.h"
      33             : #include "libavutil/pixdesc.h"
      34             : #include "config.h"
      35             : #include "rgb2rgb.h"
      36             : #include "swscale.h"
      37             : #include "swscale_internal.h"
      38             : 
      39             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = {
      40             : {  1,   3,   1,   3,   1,   3,   1,   3, },
      41             : {  2,   0,   2,   0,   2,   0,   2,   0, },
      42             : {  1,   3,   1,   3,   1,   3,   1,   3, },
      43             : };
      44             : 
      45             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = {
      46             : {  6,   2,   6,   2,   6,   2,   6,   2, },
      47             : {  0,   4,   0,   4,   0,   4,   0,   4, },
      48             : {  6,   2,   6,   2,   6,   2,   6,   2, },
      49             : };
      50             : 
      51             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = {
      52             : {  8,   4,  11,   7,   8,   4,  11,   7, },
      53             : {  2,  14,   1,  13,   2,  14,   1,  13, },
      54             : { 10,   6,   9,   5,  10,   6,   9,   5, },
      55             : {  0,  12,   3,  15,   0,  12,   3,  15, },
      56             : {  8,   4,  11,   7,   8,   4,  11,   7, },
      57             : };
      58             : 
      59             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = {
      60             : { 17,   9,  23,  15,  16,   8,  22,  14, },
      61             : {  5,  29,   3,  27,   4,  28,   2,  26, },
      62             : { 21,  13,  19,  11,  20,  12,  18,  10, },
      63             : {  0,  24,   6,  30,   1,  25,   7,  31, },
      64             : { 16,   8,  22,  14,  17,   9,  23,  15, },
      65             : {  4,  28,   2,  26,   5,  29,   3,  27, },
      66             : { 20,  12,  18,  10,  21,  13,  19,  11, },
      67             : {  1,  25,   7,  31,   0,  24,   6,  30, },
      68             : { 17,   9,  23,  15,  16,   8,  22,  14, },
      69             : };
      70             : 
      71             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = {
      72             : {  0,  55,  14,  68,   3,  58,  17,  72, },
      73             : { 37,  18,  50,  32,  40,  22,  54,  35, },
      74             : {  9,  64,   5,  59,  13,  67,   8,  63, },
      75             : { 46,  27,  41,  23,  49,  31,  44,  26, },
      76             : {  2,  57,  16,  71,   1,  56,  15,  70, },
      77             : { 39,  21,  52,  34,  38,  19,  51,  33, },
      78             : { 11,  66,   7,  62,  10,  65,   6,  60, },
      79             : { 48,  30,  43,  25,  47,  29,  42,  24, },
      80             : {  0,  55,  14,  68,   3,  58,  17,  72, },
      81             : };
      82             : 
      83             : #if 1
      84             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
      85             : {117,  62, 158, 103, 113,  58, 155, 100, },
      86             : { 34, 199,  21, 186,  31, 196,  17, 182, },
      87             : {144,  89, 131,  76, 141,  86, 127,  72, },
      88             : {  0, 165,  41, 206,  10, 175,  52, 217, },
      89             : {110,  55, 151,  96, 120,  65, 162, 107, },
      90             : { 28, 193,  14, 179,  38, 203,  24, 189, },
      91             : {138,  83, 124,  69, 148,  93, 134,  79, },
      92             : {  7, 172,  48, 213,   3, 168,  45, 210, },
      93             : {117,  62, 158, 103, 113,  58, 155, 100, },
      94             : };
      95             : #elif 1
      96             : // tries to correct a gamma of 1.5
      97             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
      98             : {  0, 143,  18, 200,   2, 156,  25, 215, },
      99             : { 78,  28, 125,  64,  89,  36, 138,  74, },
     100             : { 10, 180,   3, 161,  16, 195,   8, 175, },
     101             : {109,  51,  93,  38, 121,  60, 105,  47, },
     102             : {  1, 152,  23, 210,   0, 147,  20, 205, },
     103             : { 85,  33, 134,  71,  81,  30, 130,  67, },
     104             : { 14, 190,   6, 171,  12, 185,   5, 166, },
     105             : {117,  57, 101,  44, 113,  54,  97,  41, },
     106             : {  0, 143,  18, 200,   2, 156,  25, 215, },
     107             : };
     108             : #elif 1
     109             : // tries to correct a gamma of 2.0
     110             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
     111             : {  0, 124,   8, 193,   0, 140,  12, 213, },
     112             : { 55,  14, 104,  42,  66,  19, 119,  52, },
     113             : {  3, 168,   1, 145,   6, 187,   3, 162, },
     114             : { 86,  31,  70,  21,  99,  39,  82,  28, },
     115             : {  0, 134,  11, 206,   0, 129,   9, 200, },
     116             : { 62,  17, 114,  48,  58,  16, 109,  45, },
     117             : {  5, 181,   2, 157,   4, 175,   1, 151, },
     118             : { 95,  36,  78,  26,  90,  34,  74,  24, },
     119             : {  0, 124,   8, 193,   0, 140,  12, 213, },
     120             : };
     121             : #else
     122             : // tries to correct a gamma of 2.5
     123             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
     124             : {  0, 107,   3, 187,   0, 125,   6, 212, },
     125             : { 39,   7,  86,  28,  49,  11, 102,  36, },
     126             : {  1, 158,   0, 131,   3, 180,   1, 151, },
     127             : { 68,  19,  52,  12,  81,  25,  64,  17, },
     128             : {  0, 119,   5, 203,   0, 113,   4, 195, },
     129             : { 45,   9,  96,  33,  42,   8,  91,  30, },
     130             : {  2, 172,   1, 144,   2, 165,   0, 137, },
     131             : { 77,  23,  60,  15,  72,  21,  56,  14, },
     132             : {  0, 107,   3, 187,   0, 125,   6, 212, },
     133             : };
     134             : #endif
     135             : 
     136             : #define output_pixel(pos, val, bias, signedness) \
     137             :     if (big_endian) { \
     138             :         AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
     139             :     } else { \
     140             :         AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
     141             :     }
     142             : 
     143             : static av_always_inline void
     144      253508 : yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
     145             :                          int big_endian, int output_bits)
     146             : {
     147             :     int i;
     148      253508 :     int shift = 3;
     149      253508 :     av_assert0(output_bits == 16);
     150             : 
     151    87275524 :     for (i = 0; i < dstW; i++) {
     152    87022016 :         int val = src[i] + (1 << (shift - 1));
     153    87022016 :         output_pixel(&dest[i], val, 0, uint);
     154             :     }
     155      253508 : }
     156             : 
     157             : static av_always_inline void
     158      171184 : yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
     159             :                          const int32_t **src, uint16_t *dest, int dstW,
     160             :                          int big_endian, int output_bits)
     161             : {
     162             :     int i;
     163      171184 :     int shift = 15;
     164      171184 :     av_assert0(output_bits == 16);
     165             : 
     166    52100288 :     for (i = 0; i < dstW; i++) {
     167    51929104 :         int val = 1 << (shift - 1);
     168             :         int j;
     169             : 
     170             :         /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
     171             :          * filters (or anything with negative coeffs, the range can be slightly
     172             :          * wider in both directions. To account for this overflow, we subtract
     173             :          * a constant so it always fits in the signed range (assuming a
     174             :          * reasonable filterSize), and re-add that at the end. */
     175    51929104 :         val -= 0x40000000;
     176   265405520 :         for (j = 0; j < filterSize; j++)
     177   213476416 :             val += src[j][i] * (unsigned)filter[j];
     178             : 
     179    51929104 :         output_pixel(&dest[i], val, 0x8000, int);
     180             :     }
     181      171184 : }
     182             : 
     183        2116 : static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
     184             :                          const int16_t **chrUSrc, const int16_t **chrVSrc,
     185             :                          uint8_t *dest8, int chrDstW)
     186             : {
     187        2116 :     uint16_t *dest = (uint16_t*)dest8;
     188        2116 :     const int32_t **uSrc = (const int32_t **)chrUSrc;
     189        2116 :     const int32_t **vSrc = (const int32_t **)chrVSrc;
     190        2116 :     int shift = 15;
     191        2116 :     int big_endian = c->dstFormat == AV_PIX_FMT_P016BE;
     192             :     int i, j;
     193             : 
     194      366932 :     for (i = 0; i < chrDstW; i++) {
     195      364816 :         int u = 1 << (shift - 1);
     196      364816 :         int v = 1 << (shift - 1);
     197             : 
     198             :         /* See yuv2planeX_16_c_template for details. */
     199      364816 :         u -= 0x40000000;
     200      364816 :         v -= 0x40000000;
     201      839632 :         for (j = 0; j < chrFilterSize; j++) {
     202      474816 :             u += uSrc[j][i] * (unsigned)chrFilter[j];
     203      474816 :             v += vSrc[j][i] * (unsigned)chrFilter[j];
     204             :         }
     205             : 
     206      364816 :         output_pixel(&dest[2*i]  , u, 0x8000, int);
     207      364816 :         output_pixel(&dest[2*i+1], v, 0x8000, int);
     208             :     }
     209        2116 : }
     210             : 
     211             : #undef output_pixel
     212             : 
     213             : #define output_pixel(pos, val) \
     214             :     if (big_endian) { \
     215             :         AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
     216             :     } else { \
     217             :         AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
     218             :     }
     219             : 
     220             : static av_always_inline void
     221      647292 : yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
     222             :                          int big_endian, int output_bits)
     223             : {
     224             :     int i;
     225      647292 :     int shift = 15 - output_bits;
     226             : 
     227   200664876 :     for (i = 0; i < dstW; i++) {
     228   200017584 :         int val = src[i] + (1 << (shift - 1));
     229   200017584 :         output_pixel(&dest[i], val);
     230             :     }
     231      647292 : }
     232             : 
     233             : static av_always_inline void
     234     1174480 : yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
     235             :                          const int16_t **src, uint16_t *dest, int dstW,
     236             :                          int big_endian, int output_bits)
     237             : {
     238             :     int i;
     239     1174480 :     int shift = 11 + 16 - output_bits;
     240             : 
     241   387630896 :     for (i = 0; i < dstW; i++) {
     242   386456416 :         int val = 1 << (shift - 1);
     243             :         int j;
     244             : 
     245  1949242080 :         for (j = 0; j < filterSize; j++)
     246  1562785664 :             val += src[j][i] * filter[j];
     247             : 
     248   386456416 :         output_pixel(&dest[i], val);
     249             :     }
     250     1174480 : }
     251             : 
     252             : #undef output_pixel
     253             : 
     254             : #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
     255             : static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
     256             :                               uint8_t *dest, int dstW, \
     257             :                               const uint8_t *dither, int offset)\
     258             : { \
     259             :     yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
     260             :                          (uint16_t *) dest, dstW, is_be, bits); \
     261             : }\
     262             : static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
     263             :                               const int16_t **src, uint8_t *dest, int dstW, \
     264             :                               const uint8_t *dither, int offset)\
     265             : { \
     266             :     yuv2planeX_## template_size ## _c_template(filter, \
     267             :                          filterSize, (const typeX_t **) src, \
     268             :                          (uint16_t *) dest, dstW, is_be, bits); \
     269             : }
     270       59888 : yuv2NBPS( 9, BE, 1, 10, int16_t)
     271       67376 : yuv2NBPS( 9, LE, 0, 10, int16_t)
     272       68152 : yuv2NBPS(10, BE, 1, 10, int16_t)
     273     1427668 : yuv2NBPS(10, LE, 0, 10, int16_t)
     274       40268 : yuv2NBPS(12, BE, 1, 10, int16_t)
     275       90668 : yuv2NBPS(12, LE, 0, 10, int16_t)
     276       32004 : yuv2NBPS(14, BE, 1, 10, int16_t)
     277       35748 : yuv2NBPS(14, LE, 0, 10, int16_t)
     278       86088 : yuv2NBPS(16, BE, 1, 16, int32_t)
     279      338604 : yuv2NBPS(16, LE, 0, 16, int32_t)
     280             : 
     281     9879534 : static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
     282             :                            const int16_t **src, uint8_t *dest, int dstW,
     283             :                            const uint8_t *dither, int offset)
     284             : {
     285             :     int i;
     286  6277790784 :     for (i=0; i<dstW; i++) {
     287  6267911250 :         int val = dither[(i + offset) & 7] << 12;
     288             :         int j;
     289 25738899927 :         for (j=0; j<filterSize; j++)
     290 19470988677 :             val += src[j][i] * filter[j];
     291             : 
     292  6267911250 :         dest[i]= av_clip_uint8(val>>19);
     293             :     }
     294     9879534 : }
     295             : 
     296     5227661 : static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
     297             :                            const uint8_t *dither, int offset)
     298             : {
     299             :     int i;
     300  1756598678 :     for (i=0; i<dstW; i++) {
     301  1751371017 :         int val = (src[i] + dither[(i + offset) & 7]) >> 7;
     302  1751371017 :         dest[i]= av_clip_uint8(val);
     303             :     }
     304     5227661 : }
     305             : 
     306         100 : static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
     307             :                         const int16_t **chrUSrc, const int16_t **chrVSrc,
     308             :                         uint8_t *dest, int chrDstW)
     309             : {
     310         100 :     enum AVPixelFormat dstFormat = c->dstFormat;
     311         100 :     const uint8_t *chrDither = c->chrDither8;
     312             :     int i;
     313             : 
     314         100 :     if (dstFormat == AV_PIX_FMT_NV12)
     315        5050 :         for (i=0; i<chrDstW; i++) {
     316        5000 :             int u = chrDither[i & 7] << 12;
     317        5000 :             int v = chrDither[(i + 3) & 7] << 12;
     318             :             int j;
     319       65000 :             for (j=0; j<chrFilterSize; j++) {
     320       60000 :                 u += chrUSrc[j][i] * chrFilter[j];
     321       60000 :                 v += chrVSrc[j][i] * chrFilter[j];
     322             :             }
     323             : 
     324        5000 :             dest[2*i]= av_clip_uint8(u>>19);
     325        5000 :             dest[2*i+1]= av_clip_uint8(v>>19);
     326             :         }
     327             :     else
     328        5050 :         for (i=0; i<chrDstW; i++) {
     329        5000 :             int u = chrDither[i & 7] << 12;
     330        5000 :             int v = chrDither[(i + 3) & 7] << 12;
     331             :             int j;
     332       65000 :             for (j=0; j<chrFilterSize; j++) {
     333       60000 :                 u += chrUSrc[j][i] * chrFilter[j];
     334       60000 :                 v += chrVSrc[j][i] * chrFilter[j];
     335             :             }
     336             : 
     337        5000 :             dest[2*i]= av_clip_uint8(v>>19);
     338        5000 :             dest[2*i+1]= av_clip_uint8(u>>19);
     339             :         }
     340         100 : }
     341             : 
     342             : 
     343             : #define output_pixel(pos, val) \
     344             :     if (big_endian) { \
     345             :         AV_WB16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
     346             :     } else { \
     347             :         AV_WL16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
     348             :     }
     349             : 
     350        4032 : static void yuv2p010l1_c(const int16_t *src,
     351             :                          uint16_t *dest, int dstW,
     352             :                          int big_endian)
     353             : {
     354             :     int i;
     355        4032 :     int shift = 5;
     356             : 
     357     1423296 :     for (i = 0; i < dstW; i++) {
     358     1419264 :         int val = src[i] + (1 << (shift - 1));
     359     1419264 :         output_pixel(&dest[i], val);
     360             :     }
     361        4032 : }
     362             : 
     363         200 : static void yuv2p010lX_c(const int16_t *filter, int filterSize,
     364             :                          const int16_t **src, uint16_t *dest, int dstW,
     365             :                          int big_endian)
     366             : {
     367             :     int i, j;
     368         200 :     int shift = 17;
     369             : 
     370       40200 :     for (i = 0; i < dstW; i++) {
     371       40000 :         int val = 1 << (shift - 1);
     372             : 
     373      520000 :         for (j = 0; j < filterSize; j++)
     374      480000 :             val += src[j][i] * filter[j];
     375             : 
     376       40000 :         output_pixel(&dest[i], val);
     377             :     }
     378         200 : }
     379             : 
     380        2116 : static void yuv2p010cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
     381             :                          const int16_t **chrUSrc, const int16_t **chrVSrc,
     382             :                          uint8_t *dest8, int chrDstW)
     383             : {
     384        2116 :     uint16_t *dest = (uint16_t*)dest8;
     385        2116 :     int shift = 17;
     386        2116 :     int big_endian = c->dstFormat == AV_PIX_FMT_P010BE;
     387             :     int i, j;
     388             : 
     389      366932 :     for (i = 0; i < chrDstW; i++) {
     390      364816 :         int u = 1 << (shift - 1);
     391      364816 :         int v = 1 << (shift - 1);
     392             : 
     393      839632 :         for (j = 0; j < chrFilterSize; j++) {
     394      474816 :             u += chrUSrc[j][i] * chrFilter[j];
     395      474816 :             v += chrVSrc[j][i] * chrFilter[j];
     396             :         }
     397             : 
     398      364816 :         output_pixel(&dest[2*i]  , u);
     399      364816 :         output_pixel(&dest[2*i+1], v);
     400             :     }
     401        2116 : }
     402             : 
     403           0 : static void yuv2p010l1_LE_c(const int16_t *src,
     404             :                             uint8_t *dest, int dstW,
     405             :                             const uint8_t *dither, int offset)
     406             : {
     407           0 :     yuv2p010l1_c(src, (uint16_t*)dest, dstW, 0);
     408           0 : }
     409             : 
     410        4032 : static void yuv2p010l1_BE_c(const int16_t *src,
     411             :                             uint8_t *dest, int dstW,
     412             :                             const uint8_t *dither, int offset)
     413             : {
     414        4032 :     yuv2p010l1_c(src, (uint16_t*)dest, dstW, 1);
     415        4032 : }
     416             : 
     417         100 : static void yuv2p010lX_LE_c(const int16_t *filter, int filterSize,
     418             :                             const int16_t **src, uint8_t *dest, int dstW,
     419             :                             const uint8_t *dither, int offset)
     420             : {
     421         100 :     yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 0);
     422         100 : }
     423             : 
     424         100 : static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
     425             :                             const int16_t **src, uint8_t *dest, int dstW,
     426             :                             const uint8_t *dither, int offset)
     427             : {
     428         100 :     yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
     429         100 : }
     430             : 
     431             : #undef output_pixel
     432             : 
     433             : 
     434             : #define accumulate_bit(acc, val) \
     435             :     acc <<= 1; \
     436             :     acc |= (val) >= 234
     437             : #define output_pixel(pos, acc) \
     438             :     if (target == AV_PIX_FMT_MONOBLACK) { \
     439             :         pos = acc; \
     440             :     } else { \
     441             :         pos = ~acc; \
     442             :     }
     443             : 
     444             : static av_always_inline void
     445       71308 : yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
     446             :                       const int16_t **lumSrc, int lumFilterSize,
     447             :                       const int16_t *chrFilter, const int16_t **chrUSrc,
     448             :                       const int16_t **chrVSrc, int chrFilterSize,
     449             :                       const int16_t **alpSrc, uint8_t *dest, int dstW,
     450             :                       int y, enum AVPixelFormat target)
     451             : {
     452       71308 :     const uint8_t * const d128 = ff_dither_8x8_220[y&7];
     453             :     int i;
     454       71308 :     unsigned acc = 0;
     455       71308 :     int err = 0;
     456             : 
     457    12336016 :     for (i = 0; i < dstW; i += 2) {
     458             :         int j;
     459    12264708 :         int Y1 = 1 << 18;
     460    12264708 :         int Y2 = 1 << 18;
     461             : 
     462    24749416 :         for (j = 0; j < lumFilterSize; j++) {
     463    12484708 :             Y1 += lumSrc[j][i]   * lumFilter[j];
     464    12484708 :             Y2 += lumSrc[j][i+1] * lumFilter[j];
     465             :         }
     466    12264708 :         Y1 >>= 19;
     467    12264708 :         Y2 >>= 19;
     468    12264708 :         if ((Y1 | Y2) & 0x100) {
     469         376 :             Y1 = av_clip_uint8(Y1);
     470         376 :             Y2 = av_clip_uint8(Y2);
     471             :         }
     472    12264708 :         if (c->dither == SWS_DITHER_ED) {
     473           0 :             Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
     474           0 :             c->dither_error[0][i] = err;
     475           0 :             acc = 2*acc + (Y1 >= 128);
     476           0 :             Y1 -= 220*(acc&1);
     477             : 
     478           0 :             err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4);
     479           0 :             c->dither_error[0][i+1] = Y1;
     480           0 :             acc = 2*acc + (err >= 128);
     481           0 :             err -= 220*(acc&1);
     482             :         } else {
     483    12264708 :             accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
     484    12264708 :             accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
     485             :         }
     486    12264708 :         if ((i & 7) == 6) {
     487     3065752 :             output_pixel(*dest++, acc);
     488             :         }
     489             :     }
     490       71308 :     c->dither_error[0][i] = err;
     491             : 
     492       71308 :     if (i & 6) {
     493        1700 :         output_pixel(*dest, acc);
     494             :     }
     495       71308 : }
     496             : 
     497             : static av_always_inline void
     498           0 : yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
     499             :                       const int16_t *ubuf[2], const int16_t *vbuf[2],
     500             :                       const int16_t *abuf[2], uint8_t *dest, int dstW,
     501             :                       int yalpha, int uvalpha, int y,
     502             :                       enum AVPixelFormat target)
     503             : {
     504           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1];
     505           0 :     const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
     506           0 :     int  yalpha1 = 4096 - yalpha;
     507             :     int i;
     508             :     av_assert2(yalpha  <= 4096U);
     509             : 
     510           0 :     if (c->dither == SWS_DITHER_ED) {
     511           0 :         int err = 0;
     512           0 :         int acc = 0;
     513           0 :         for (i = 0; i < dstW; i +=2) {
     514             :             int Y;
     515             : 
     516           0 :             Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
     517           0 :             Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
     518           0 :             c->dither_error[0][i] = err;
     519           0 :             acc = 2*acc + (Y >= 128);
     520           0 :             Y -= 220*(acc&1);
     521             : 
     522           0 :             err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
     523           0 :             err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
     524           0 :             c->dither_error[0][i+1] = Y;
     525           0 :             acc = 2*acc + (err >= 128);
     526           0 :             err -= 220*(acc&1);
     527             : 
     528           0 :             if ((i & 7) == 6)
     529           0 :                 output_pixel(*dest++, acc);
     530             :         }
     531           0 :         c->dither_error[0][i] = err;
     532             :     } else {
     533           0 :     for (i = 0; i < dstW; i += 8) {
     534           0 :         int Y, acc = 0;
     535             : 
     536           0 :         Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
     537           0 :         accumulate_bit(acc, Y + d128[0]);
     538           0 :         Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
     539           0 :         accumulate_bit(acc, Y + d128[1]);
     540           0 :         Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19;
     541           0 :         accumulate_bit(acc, Y + d128[2]);
     542           0 :         Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19;
     543           0 :         accumulate_bit(acc, Y + d128[3]);
     544           0 :         Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19;
     545           0 :         accumulate_bit(acc, Y + d128[4]);
     546           0 :         Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19;
     547           0 :         accumulate_bit(acc, Y + d128[5]);
     548           0 :         Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19;
     549           0 :         accumulate_bit(acc, Y + d128[6]);
     550           0 :         Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19;
     551           0 :         accumulate_bit(acc, Y + d128[7]);
     552             : 
     553           0 :         output_pixel(*dest++, acc);
     554             :     }
     555             :     }
     556           0 : }
     557             : 
     558             : static av_always_inline void
     559        9600 : yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
     560             :                       const int16_t *ubuf[2], const int16_t *vbuf[2],
     561             :                       const int16_t *abuf0, uint8_t *dest, int dstW,
     562             :                       int uvalpha, int y, enum AVPixelFormat target)
     563             : {
     564        9600 :     const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
     565             :     int i;
     566             : 
     567        9600 :     if (c->dither == SWS_DITHER_ED) {
     568           0 :         int err = 0;
     569           0 :         int acc = 0;
     570           0 :         for (i = 0; i < dstW; i +=2) {
     571             :             int Y;
     572             : 
     573           0 :             Y = ((buf0[i + 0] + 64) >> 7);
     574           0 :             Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
     575           0 :             c->dither_error[0][i] = err;
     576           0 :             acc = 2*acc + (Y >= 128);
     577           0 :             Y -= 220*(acc&1);
     578             : 
     579           0 :             err = ((buf0[i + 1] + 64) >> 7);
     580           0 :             err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
     581           0 :             c->dither_error[0][i+1] = Y;
     582           0 :             acc = 2*acc + (err >= 128);
     583           0 :             err -= 220*(acc&1);
     584             : 
     585           0 :             if ((i & 7) == 6)
     586           0 :                 output_pixel(*dest++, acc);
     587             :         }
     588           0 :         c->dither_error[0][i] = err;
     589             :     } else {
     590       67200 :     for (i = 0; i < dstW; i += 8) {
     591       57600 :         int acc = 0;
     592       57600 :         accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
     593       57600 :         accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
     594       57600 :         accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
     595       57600 :         accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]);
     596       57600 :         accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]);
     597       57600 :         accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]);
     598       57600 :         accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]);
     599       57600 :         accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]);
     600             : 
     601       57600 :         output_pixel(*dest++, acc);
     602             :     }
     603             :     }
     604        9600 : }
     605             : 
     606             : #undef output_pixel
     607             : #undef accumulate_bit
     608             : 
     609             : #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
     610             : static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
     611             :                                 const int16_t **lumSrc, int lumFilterSize, \
     612             :                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
     613             :                                 const int16_t **chrVSrc, int chrFilterSize, \
     614             :                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
     615             :                                 int y) \
     616             : { \
     617             :     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
     618             :                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
     619             :                                   alpSrc, dest, dstW, y, fmt); \
     620             : } \
     621             :  \
     622             : static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
     623             :                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
     624             :                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
     625             :                                 int yalpha, int uvalpha, int y) \
     626             : { \
     627             :     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
     628             :                                   dest, dstW, yalpha, uvalpha, y, fmt); \
     629             : } \
     630             :  \
     631             : static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
     632             :                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
     633             :                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
     634             :                                 int uvalpha, int y) \
     635             : { \
     636             :     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
     637             :                                   abuf0, dest, dstW, uvalpha, \
     638             :                                   y, fmt); \
     639             : }
     640             : 
     641       73320 : YUV2PACKEDWRAPPER(yuv2mono,, white, AV_PIX_FMT_MONOWHITE)
     642        7588 : YUV2PACKEDWRAPPER(yuv2mono,, black, AV_PIX_FMT_MONOBLACK)
     643             : 
     644             : #define output_pixels(pos, Y1, U, Y2, V) \
     645             :     if (target == AV_PIX_FMT_YUYV422) { \
     646             :         dest[pos + 0] = Y1; \
     647             :         dest[pos + 1] = U;  \
     648             :         dest[pos + 2] = Y2; \
     649             :         dest[pos + 3] = V;  \
     650             :     } else if (target == AV_PIX_FMT_YVYU422) { \
     651             :         dest[pos + 0] = Y1; \
     652             :         dest[pos + 1] = V;  \
     653             :         dest[pos + 2] = Y2; \
     654             :         dest[pos + 3] = U;  \
     655             :     } else { /* AV_PIX_FMT_UYVY422 */ \
     656             :         dest[pos + 0] = U;  \
     657             :         dest[pos + 1] = Y1; \
     658             :         dest[pos + 2] = V;  \
     659             :         dest[pos + 3] = Y2; \
     660             :     }
     661             : 
     662             : static av_always_inline void
     663       18732 : yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
     664             :                      const int16_t **lumSrc, int lumFilterSize,
     665             :                      const int16_t *chrFilter, const int16_t **chrUSrc,
     666             :                      const int16_t **chrVSrc, int chrFilterSize,
     667             :                      const int16_t **alpSrc, uint8_t *dest, int dstW,
     668             :                      int y, enum AVPixelFormat target)
     669             : {
     670             :     int i;
     671             : 
     672     3292764 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
     673             :         int j;
     674     3274032 :         int Y1 = 1 << 18;
     675     3274032 :         int Y2 = 1 << 18;
     676     3274032 :         int U  = 1 << 18;
     677     3274032 :         int V  = 1 << 18;
     678             : 
     679     6878064 :         for (j = 0; j < lumFilterSize; j++) {
     680     3604032 :             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
     681     3604032 :             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
     682             :         }
     683    16610160 :         for (j = 0; j < chrFilterSize; j++) {
     684    13336128 :             U += chrUSrc[j][i] * chrFilter[j];
     685    13336128 :             V += chrVSrc[j][i] * chrFilter[j];
     686             :         }
     687     3274032 :         Y1 >>= 19;
     688     3274032 :         Y2 >>= 19;
     689     3274032 :         U  >>= 19;
     690     3274032 :         V  >>= 19;
     691     3274032 :         if ((Y1 | Y2 | U | V) & 0x100) {
     692          17 :             Y1 = av_clip_uint8(Y1);
     693          17 :             Y2 = av_clip_uint8(Y2);
     694          17 :             U  = av_clip_uint8(U);
     695          17 :             V  = av_clip_uint8(V);
     696             :         }
     697     3274032 :         output_pixels(4*i, Y1, U, Y2, V);
     698             :     }
     699       18732 : }
     700             : 
     701             : static av_always_inline void
     702           0 : yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
     703             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
     704             :                      const int16_t *abuf[2], uint8_t *dest, int dstW,
     705             :                      int yalpha, int uvalpha, int y,
     706             :                      enum AVPixelFormat target)
     707             : {
     708           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
     709           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
     710           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
     711           0 :     int  yalpha1 = 4096 - yalpha;
     712           0 :     int uvalpha1 = 4096 - uvalpha;
     713             :     int i;
     714             :     av_assert2(yalpha  <= 4096U);
     715             :     av_assert2(uvalpha <= 4096U);
     716             : 
     717           0 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
     718           0 :         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
     719           0 :         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
     720           0 :         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
     721           0 :         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
     722             : 
     723           0 :         if ((Y1 | Y2 | U | V) & 0x100) {
     724           0 :             Y1 = av_clip_uint8(Y1);
     725           0 :             Y2 = av_clip_uint8(Y2);
     726           0 :             U  = av_clip_uint8(U);
     727           0 :             V  = av_clip_uint8(V);
     728             :         }
     729             : 
     730           0 :         output_pixels(i * 4, Y1, U, Y2, V);
     731             :     }
     732           0 : }
     733             : 
     734             : static av_always_inline void
     735       86400 : yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
     736             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
     737             :                      const int16_t *abuf0, uint8_t *dest, int dstW,
     738             :                      int uvalpha, int y, enum AVPixelFormat target)
     739             : {
     740       86400 :     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     741             :     int i;
     742             : 
     743       86400 :     if (uvalpha < 2048) {
     744    31190400 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
     745    31104000 :             int Y1 = (buf0[i * 2    ]+64) >> 7;
     746    31104000 :             int Y2 = (buf0[i * 2 + 1]+64) >> 7;
     747    31104000 :             int U  = (ubuf0[i]       +64) >> 7;
     748    31104000 :             int V  = (vbuf0[i]       +64) >> 7;
     749             : 
     750    31104000 :             if ((Y1 | Y2 | U | V) & 0x100) {
     751           0 :                 Y1 = av_clip_uint8(Y1);
     752           0 :                 Y2 = av_clip_uint8(Y2);
     753           0 :                 U  = av_clip_uint8(U);
     754           0 :                 V  = av_clip_uint8(V);
     755             :             }
     756             : 
     757    31104000 :             Y1 = av_clip_uint8(Y1);
     758    31104000 :             Y2 = av_clip_uint8(Y2);
     759    31104000 :             U  = av_clip_uint8(U);
     760    31104000 :             V  = av_clip_uint8(V);
     761             : 
     762    31104000 :             output_pixels(i * 4, Y1, U, Y2, V);
     763             :         }
     764             :     } else {
     765           0 :         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
     766           0 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
     767           0 :             int Y1 = (buf0[i * 2    ]    + 64) >> 7;
     768           0 :             int Y2 = (buf0[i * 2 + 1]    + 64) >> 7;
     769           0 :             int U  = (ubuf0[i] + ubuf1[i]+128) >> 8;
     770           0 :             int V  = (vbuf0[i] + vbuf1[i]+128) >> 8;
     771             : 
     772           0 :             if ((Y1 | Y2 | U | V) & 0x100) {
     773           0 :                 Y1 = av_clip_uint8(Y1);
     774           0 :                 Y2 = av_clip_uint8(Y2);
     775           0 :                 U  = av_clip_uint8(U);
     776           0 :                 V  = av_clip_uint8(V);
     777             :             }
     778             : 
     779           0 :             Y1 = av_clip_uint8(Y1);
     780           0 :             Y2 = av_clip_uint8(Y2);
     781           0 :             U  = av_clip_uint8(U);
     782           0 :             V  = av_clip_uint8(V);
     783             : 
     784           0 :             output_pixels(i * 4, Y1, U, Y2, V);
     785             :         }
     786             :     }
     787       86400 : }
     788             : 
     789             : #undef output_pixels
     790             : 
     791       11620 : YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422)
     792        3556 : YUV2PACKEDWRAPPER(yuv2, 422, yvyu422, AV_PIX_FMT_YVYU422)
     793       89956 : YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
     794             : 
     795             : #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
     796             : #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
     797             : #define output_pixel(pos, val) \
     798             :     if (isBE(target)) { \
     799             :         AV_WB16(pos, val); \
     800             :     } else { \
     801             :         AV_WL16(pos, val); \
     802             :     }
     803             : 
     804             : static av_always_inline void
     805      107684 : yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
     806             :                        const int32_t **lumSrc, int lumFilterSize,
     807             :                        const int16_t *chrFilter, const int32_t **chrUSrc,
     808             :                        const int32_t **chrVSrc, int chrFilterSize,
     809             :                        const int32_t **alpSrc, uint16_t *dest, int dstW,
     810             :                        int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
     811             : {
     812             :     int i;
     813      107684 :     int A1 = 0xffff<<14, A2 = 0xffff<<14;
     814             : 
     815    18789768 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
     816             :         int j;
     817    18682084 :         int Y1 = -0x40000000;
     818    18682084 :         int Y2 = -0x40000000;
     819    18682084 :         int U  = -(128 << 23); // 19
     820    18682084 :         int V  = -(128 << 23);
     821             :         int R, G, B;
     822             : 
     823    37364168 :         for (j = 0; j < lumFilterSize; j++) {
     824    18682084 :             Y1 += lumSrc[j][i * 2]     * (unsigned)lumFilter[j];
     825    18682084 :             Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
     826             :         }
     827    93410420 :         for (j = 0; j < chrFilterSize; j++) {;
     828    74728336 :             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
     829    74728336 :             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
     830             :         }
     831             : 
     832    18682084 :         if (hasAlpha) {
     833           0 :             A1 = -0x40000000;
     834           0 :             A2 = -0x40000000;
     835           0 :             for (j = 0; j < lumFilterSize; j++) {
     836           0 :                 A1 += alpSrc[j][i * 2]     * (unsigned)lumFilter[j];
     837           0 :                 A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
     838             :             }
     839           0 :             A1 >>= 1;
     840           0 :             A1 += 0x20002000;
     841           0 :             A2 >>= 1;
     842           0 :             A2 += 0x20002000;
     843             :         }
     844             : 
     845             :         // 8 bits: 12+15=27; 16 bits: 12+19=31
     846    18682084 :         Y1 >>= 14; // 10
     847    18682084 :         Y1 += 0x10000;
     848    18682084 :         Y2 >>= 14;
     849    18682084 :         Y2 += 0x10000;
     850    18682084 :         U  >>= 14;
     851    18682084 :         V  >>= 14;
     852             : 
     853             :         // 8 bits: 27 -> 17 bits, 16 bits: 31 - 14 = 17 bits
     854    18682084 :         Y1 -= c->yuv2rgb_y_offset;
     855    18682084 :         Y2 -= c->yuv2rgb_y_offset;
     856    18682084 :         Y1 *= c->yuv2rgb_y_coeff;
     857    18682084 :         Y2 *= c->yuv2rgb_y_coeff;
     858    18682084 :         Y1 += 1 << 13; // 21
     859    18682084 :         Y2 += 1 << 13;
     860             :         // 8 bits: 17 + 13 bits = 30 bits, 16 bits: 17 + 13 bits = 30 bits
     861             : 
     862    18682084 :         R = V * c->yuv2rgb_v2r_coeff;
     863    18682084 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
     864    18682084 :         B =                            U * c->yuv2rgb_u2b_coeff;
     865             : 
     866             :         // 8 bits: 30 - 22 = 8 bits, 16 bits: 30 bits - 14 = 16 bits
     867    18682084 :         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
     868    18682084 :         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
     869    18682084 :         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
     870    18682084 :         if (eightbytes) {
     871     3750912 :             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
     872     3750912 :             output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
     873     3750912 :             output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
     874     3750912 :             output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
     875     3750912 :             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
     876     3750912 :             dest += 8;
     877             :         } else {
     878    14931172 :             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
     879    14931172 :             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
     880    14931172 :             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
     881    14931172 :             dest += 6;
     882             :         }
     883             :     }
     884      107684 : }
     885             : 
     886             : static av_always_inline void
     887           0 : yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
     888             :                        const int32_t *ubuf[2], const int32_t *vbuf[2],
     889             :                        const int32_t *abuf[2], uint16_t *dest, int dstW,
     890             :                        int yalpha, int uvalpha, int y,
     891             :                        enum AVPixelFormat target, int hasAlpha, int eightbytes)
     892             : {
     893           0 :     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
     894           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
     895           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
     896           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
     897           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
     898           0 :     int  yalpha1 = 4096 - yalpha;
     899           0 :     int uvalpha1 = 4096 - uvalpha;
     900             :     int i;
     901           0 :     int A1 = 0xffff<<14, A2 = 0xffff<<14;
     902             : 
     903             :     av_assert2(yalpha  <= 4096U);
     904             :     av_assert2(uvalpha <= 4096U);
     905             : 
     906           0 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
     907           0 :         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
     908           0 :         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
     909           0 :         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha - (128 << 23)) >> 14;
     910           0 :         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha - (128 << 23)) >> 14;
     911             :         int R, G, B;
     912             : 
     913           0 :         Y1 -= c->yuv2rgb_y_offset;
     914           0 :         Y2 -= c->yuv2rgb_y_offset;
     915           0 :         Y1 *= c->yuv2rgb_y_coeff;
     916           0 :         Y2 *= c->yuv2rgb_y_coeff;
     917           0 :         Y1 += 1 << 13;
     918           0 :         Y2 += 1 << 13;
     919             : 
     920           0 :         R = V * c->yuv2rgb_v2r_coeff;
     921           0 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
     922           0 :         B =                            U * c->yuv2rgb_u2b_coeff;
     923             : 
     924           0 :         if (hasAlpha) {
     925           0 :             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 1;
     926           0 :             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1;
     927             : 
     928           0 :             A1 += 1 << 13;
     929           0 :             A2 += 1 << 13;
     930             :         }
     931             : 
     932           0 :         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
     933           0 :         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
     934           0 :         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
     935           0 :         if (eightbytes) {
     936           0 :             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
     937           0 :             output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
     938           0 :             output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
     939           0 :             output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
     940           0 :             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
     941           0 :             dest += 8;
     942             :         } else {
     943           0 :             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
     944           0 :             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
     945           0 :             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
     946           0 :             dest += 6;
     947             :         }
     948             :     }
     949           0 : }
     950             : 
     951             : static av_always_inline void
     952       44900 : yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
     953             :                        const int32_t *ubuf[2], const int32_t *vbuf[2],
     954             :                        const int32_t *abuf0, uint16_t *dest, int dstW,
     955             :                        int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
     956             : {
     957       44900 :     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     958             :     int i;
     959       44900 :     int A1 = 0xffff<<14, A2= 0xffff<<14;
     960             : 
     961       44900 :     if (uvalpha < 2048) {
     962     7677000 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
     963     7632100 :             int Y1 = (buf0[i * 2]    ) >> 2;
     964     7632100 :             int Y2 = (buf0[i * 2 + 1]) >> 2;
     965     7632100 :             int U  = (ubuf0[i] - (128 << 11)) >> 2;
     966     7632100 :             int V  = (vbuf0[i] - (128 << 11)) >> 2;
     967             :             int R, G, B;
     968             : 
     969     7632100 :             Y1 -= c->yuv2rgb_y_offset;
     970     7632100 :             Y2 -= c->yuv2rgb_y_offset;
     971     7632100 :             Y1 *= c->yuv2rgb_y_coeff;
     972     7632100 :             Y2 *= c->yuv2rgb_y_coeff;
     973     7632100 :             Y1 += 1 << 13;
     974     7632100 :             Y2 += 1 << 13;
     975             : 
     976     7632100 :             if (hasAlpha) {
     977           0 :                 A1 = abuf0[i * 2    ] << 11;
     978           0 :                 A2 = abuf0[i * 2 + 1] << 11;
     979             : 
     980           0 :                 A1 += 1 << 13;
     981           0 :                 A2 += 1 << 13;
     982             :             }
     983             : 
     984     7632100 :             R = V * c->yuv2rgb_v2r_coeff;
     985     7632100 :             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
     986     7632100 :             B =                            U * c->yuv2rgb_u2b_coeff;
     987             : 
     988     7632100 :             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
     989     7632100 :             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
     990     7632100 :             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
     991     7632100 :             if (eightbytes) {
     992           0 :                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
     993           0 :                 output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
     994           0 :                 output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
     995           0 :                 output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
     996           0 :                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
     997           0 :                 dest += 8;
     998             :             } else {
     999     7632100 :                 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
    1000     7632100 :                 output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
    1001     7632100 :                 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
    1002     7632100 :                 dest += 6;
    1003             :             }
    1004             :         }
    1005             :     } else {
    1006           0 :         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
    1007           0 :         int A1 = 0xffff<<14, A2 = 0xffff<<14;
    1008           0 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1009           0 :             int Y1 = (buf0[i * 2]    ) >> 2;
    1010           0 :             int Y2 = (buf0[i * 2 + 1]) >> 2;
    1011           0 :             int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
    1012           0 :             int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
    1013             :             int R, G, B;
    1014             : 
    1015           0 :             Y1 -= c->yuv2rgb_y_offset;
    1016           0 :             Y2 -= c->yuv2rgb_y_offset;
    1017           0 :             Y1 *= c->yuv2rgb_y_coeff;
    1018           0 :             Y2 *= c->yuv2rgb_y_coeff;
    1019           0 :             Y1 += 1 << 13;
    1020           0 :             Y2 += 1 << 13;
    1021             : 
    1022           0 :             if (hasAlpha) {
    1023           0 :                 A1 = abuf0[i * 2    ] << 11;
    1024           0 :                 A2 = abuf0[i * 2 + 1] << 11;
    1025             : 
    1026           0 :                 A1 += 1 << 13;
    1027           0 :                 A2 += 1 << 13;
    1028             :             }
    1029             : 
    1030           0 :             R = V * c->yuv2rgb_v2r_coeff;
    1031           0 :             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1032           0 :             B =                            U * c->yuv2rgb_u2b_coeff;
    1033             : 
    1034           0 :             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
    1035           0 :             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
    1036           0 :             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
    1037           0 :             if (eightbytes) {
    1038           0 :                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
    1039           0 :                 output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
    1040           0 :                 output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
    1041           0 :                 output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
    1042           0 :                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
    1043           0 :                 dest += 8;
    1044             :             } else {
    1045           0 :                 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
    1046           0 :                 output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
    1047           0 :                 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
    1048           0 :                 dest += 6;
    1049             :             }
    1050             :         }
    1051             :     }
    1052       44900 : }
    1053             : 
    1054             : static av_always_inline void
    1055        1000 : yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
    1056             :                        const int32_t **lumSrc, int lumFilterSize,
    1057             :                        const int16_t *chrFilter, const int32_t **chrUSrc,
    1058             :                        const int32_t **chrVSrc, int chrFilterSize,
    1059             :                        const int32_t **alpSrc, uint16_t *dest, int dstW,
    1060             :                        int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
    1061             : {
    1062             :     int i;
    1063        1000 :     int A = 0xffff<<14;
    1064             : 
    1065      201000 :     for (i = 0; i < dstW; i++) {
    1066             :         int j;
    1067      200000 :         int Y  = -0x40000000;
    1068      200000 :         int U  = -(128 << 23); // 19
    1069      200000 :         int V  = -(128 << 23);
    1070             :         int R, G, B;
    1071             : 
    1072     2600000 :         for (j = 0; j < lumFilterSize; j++) {
    1073     2400000 :             Y += lumSrc[j][i]  * (unsigned)lumFilter[j];
    1074             :         }
    1075     2600000 :         for (j = 0; j < chrFilterSize; j++) {;
    1076     2400000 :             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
    1077     2400000 :             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
    1078             :         }
    1079             : 
    1080      200000 :         if (hasAlpha) {
    1081       80000 :             A = -0x40000000;
    1082     1040000 :             for (j = 0; j < lumFilterSize; j++) {
    1083      960000 :                 A += alpSrc[j][i] * (unsigned)lumFilter[j];
    1084             :             }
    1085       80000 :             A >>= 1;
    1086       80000 :             A += 0x20002000;
    1087             :         }
    1088             : 
    1089             :         // 8bit: 12+15=27; 16-bit: 12+19=31
    1090      200000 :         Y  >>= 14; // 10
    1091      200000 :         Y += 0x10000;
    1092      200000 :         U  >>= 14;
    1093      200000 :         V  >>= 14;
    1094             : 
    1095             :         // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
    1096      200000 :         Y -= c->yuv2rgb_y_offset;
    1097      200000 :         Y *= c->yuv2rgb_y_coeff;
    1098      200000 :         Y += 1 << 13; // 21
    1099             :         // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
    1100             : 
    1101      200000 :         R = V * c->yuv2rgb_v2r_coeff;
    1102      200000 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1103      200000 :         B =                            U * c->yuv2rgb_u2b_coeff;
    1104             : 
    1105             :         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
    1106      200000 :         output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
    1107      200000 :         output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
    1108      200000 :         output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
    1109      200000 :         if (eightbytes) {
    1110       80000 :             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
    1111       80000 :             dest += 4;
    1112             :         } else {
    1113      120000 :             dest += 3;
    1114             :         }
    1115             :     }
    1116        1000 : }
    1117             : 
    1118             : static av_always_inline void
    1119           0 : yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
    1120             :                        const int32_t *ubuf[2], const int32_t *vbuf[2],
    1121             :                        const int32_t *abuf[2], uint16_t *dest, int dstW,
    1122             :                        int yalpha, int uvalpha, int y,
    1123             :                        enum AVPixelFormat target, int hasAlpha, int eightbytes)
    1124             : {
    1125           0 :     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
    1126           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
    1127           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
    1128           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
    1129           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
    1130           0 :     int  yalpha1 = 4096 - yalpha;
    1131           0 :     int uvalpha1 = 4096 - uvalpha;
    1132             :     int i;
    1133           0 :     int A = 0xffff<<14;
    1134             : 
    1135             :     av_assert2(yalpha  <= 4096U);
    1136             :     av_assert2(uvalpha <= 4096U);
    1137             : 
    1138           0 :     for (i = 0; i < dstW; i++) {
    1139           0 :         int Y  = (buf0[i]     * yalpha1  + buf1[i]     * yalpha) >> 14;
    1140           0 :         int U  = (ubuf0[i]   * uvalpha1 + ubuf1[i]     * uvalpha - (128 << 23)) >> 14;
    1141           0 :         int V  = (vbuf0[i]   * uvalpha1 + vbuf1[i]     * uvalpha - (128 << 23)) >> 14;
    1142             :         int R, G, B;
    1143             : 
    1144           0 :         Y -= c->yuv2rgb_y_offset;
    1145           0 :         Y *= c->yuv2rgb_y_coeff;
    1146           0 :         Y += 1 << 13;
    1147             : 
    1148           0 :         R = V * c->yuv2rgb_v2r_coeff;
    1149           0 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1150           0 :         B =                            U * c->yuv2rgb_u2b_coeff;
    1151             : 
    1152           0 :         if (hasAlpha) {
    1153           0 :             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 1;
    1154             : 
    1155           0 :             A += 1 << 13;
    1156             :         }
    1157             : 
    1158           0 :         output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
    1159           0 :         output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
    1160           0 :         output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
    1161           0 :         if (eightbytes) {
    1162           0 :             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
    1163           0 :             dest += 4;
    1164             :         } else {
    1165           0 :             dest += 3;
    1166             :         }
    1167             :     }
    1168           0 : }
    1169             : 
    1170             : static av_always_inline void
    1171           0 : yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
    1172             :                        const int32_t *ubuf[2], const int32_t *vbuf[2],
    1173             :                        const int32_t *abuf0, uint16_t *dest, int dstW,
    1174             :                        int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
    1175             : {
    1176           0 :     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
    1177             :     int i;
    1178           0 :     int A = 0xffff<<14;
    1179             : 
    1180           0 :     if (uvalpha < 2048) {
    1181           0 :         for (i = 0; i < dstW; i++) {
    1182           0 :             int Y  = (buf0[i]) >> 2;
    1183           0 :             int U  = (ubuf0[i] - (128 << 11)) >> 2;
    1184           0 :             int V  = (vbuf0[i] - (128 << 11)) >> 2;
    1185             :             int R, G, B;
    1186             : 
    1187           0 :             Y -= c->yuv2rgb_y_offset;
    1188           0 :             Y *= c->yuv2rgb_y_coeff;
    1189           0 :             Y += 1 << 13;
    1190             : 
    1191           0 :             if (hasAlpha) {
    1192           0 :                 A = abuf0[i] << 11;
    1193             : 
    1194           0 :                 A += 1 << 13;
    1195             :             }
    1196             : 
    1197           0 :             R = V * c->yuv2rgb_v2r_coeff;
    1198           0 :             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1199           0 :             B =                            U * c->yuv2rgb_u2b_coeff;
    1200             : 
    1201           0 :             output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
    1202           0 :             output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
    1203           0 :             output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
    1204           0 :             if (eightbytes) {
    1205           0 :                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
    1206           0 :                 dest += 4;
    1207             :             } else {
    1208           0 :                 dest += 3;
    1209             :             }
    1210             :         }
    1211             :     } else {
    1212           0 :         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
    1213           0 :         int A = 0xffff<<14;
    1214           0 :         for (i = 0; i < dstW; i++) {
    1215           0 :             int Y  = (buf0[i]    ) >> 2;
    1216           0 :             int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
    1217           0 :             int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
    1218             :             int R, G, B;
    1219             : 
    1220           0 :             Y -= c->yuv2rgb_y_offset;
    1221           0 :             Y *= c->yuv2rgb_y_coeff;
    1222           0 :             Y += 1 << 13;
    1223             : 
    1224           0 :             if (hasAlpha) {
    1225           0 :                 A = abuf0[i] << 11;
    1226             : 
    1227           0 :                 A += 1 << 13;
    1228             :             }
    1229             : 
    1230           0 :             R = V * c->yuv2rgb_v2r_coeff;
    1231           0 :             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1232           0 :             B =                            U * c->yuv2rgb_u2b_coeff;
    1233             : 
    1234           0 :             output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
    1235           0 :             output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
    1236           0 :             output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
    1237           0 :             if (eightbytes) {
    1238           0 :                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
    1239           0 :                 dest += 4;
    1240             :             } else {
    1241           0 :                 dest += 3;
    1242             :             }
    1243             :         }
    1244             :     }
    1245           0 : }
    1246             : 
    1247             : #undef output_pixel
    1248             : #undef r_b
    1249             : #undef b_r
    1250             : 
    1251             : #define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
    1252             : static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
    1253             :                         const int16_t **_lumSrc, int lumFilterSize, \
    1254             :                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
    1255             :                         const int16_t **_chrVSrc, int chrFilterSize, \
    1256             :                         const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
    1257             :                         int y) \
    1258             : { \
    1259             :     const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
    1260             :                   **chrUSrc = (const int32_t **) _chrUSrc, \
    1261             :                   **chrVSrc = (const int32_t **) _chrVSrc, \
    1262             :                   **alpSrc  = (const int32_t **) _alpSrc; \
    1263             :     uint16_t *dest = (uint16_t *) _dest; \
    1264             :     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
    1265             :                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
    1266             :                           alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \
    1267             : } \
    1268             :  \
    1269             : static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
    1270             :                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
    1271             :                         const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
    1272             :                         int yalpha, int uvalpha, int y) \
    1273             : { \
    1274             :     const int32_t **buf  = (const int32_t **) _buf, \
    1275             :                   **ubuf = (const int32_t **) _ubuf, \
    1276             :                   **vbuf = (const int32_t **) _vbuf, \
    1277             :                   **abuf = (const int32_t **) _abuf; \
    1278             :     uint16_t *dest = (uint16_t *) _dest; \
    1279             :     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
    1280             :                           dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \
    1281             : } \
    1282             :  \
    1283             : static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
    1284             :                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
    1285             :                         const int16_t *_abuf0, uint8_t *_dest, int dstW, \
    1286             :                         int uvalpha, int y) \
    1287             : { \
    1288             :     const int32_t *buf0  = (const int32_t *)  _buf0, \
    1289             :                  **ubuf  = (const int32_t **) _ubuf, \
    1290             :                  **vbuf  = (const int32_t **) _vbuf, \
    1291             :                   *abuf0 = (const int32_t *)  _abuf0; \
    1292             :     uint16_t *dest = (uint16_t *) _dest; \
    1293             :     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
    1294             :                                   dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \
    1295             : }
    1296             : 
    1297       16128 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
    1298      106504 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
    1299        4320 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
    1300        4320 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
    1301           0 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
    1302           0 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
    1303        4320 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
    1304        8352 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
    1305           0 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
    1306           0 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
    1307        4320 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
    1308        4320 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
    1309             : 
    1310         200 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
    1311         200 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
    1312         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
    1313         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
    1314         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
    1315         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
    1316           0 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
    1317           0 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
    1318         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
    1319         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
    1320           0 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
    1321           0 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
    1322             : 
    1323             : /*
    1324             :  * Write out 2 RGB pixels in the target pixel format. This function takes a
    1325             :  * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
    1326             :  * things like endianness conversion and shifting. The caller takes care of
    1327             :  * setting the correct offset in these tables from the chroma (U/V) values.
    1328             :  * This function then uses the luminance (Y1/Y2) values to write out the
    1329             :  * correct RGB values into the destination buffer.
    1330             :  */
    1331             : static av_always_inline void
    1332   211520788 : yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
    1333             :               unsigned A1, unsigned A2,
    1334             :               const void *_r, const void *_g, const void *_b, int y,
    1335             :               enum AVPixelFormat target, int hasAlpha)
    1336             : {
    1337   211520788 :     if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA ||
    1338   235908828 :         target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) {
    1339    28398728 :         uint32_t *dest = (uint32_t *) _dest;
    1340    28398728 :         const uint32_t *r = (const uint32_t *) _r;
    1341    28398728 :         const uint32_t *g = (const uint32_t *) _g;
    1342    28398728 :         const uint32_t *b = (const uint32_t *) _b;
    1343             : 
    1344             : #if CONFIG_SMALL
    1345             :         int sh = hasAlpha ? ((target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24) : 0;
    1346             : 
    1347             :         dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
    1348             :         dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
    1349             : #else
    1350    28398728 :         if (hasAlpha) {
    1351           0 :             int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
    1352             : 
    1353             :             av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0);
    1354           0 :             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
    1355           0 :             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
    1356             :         } else {
    1357             : #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
    1358             :             int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
    1359             : 
    1360             :             av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF);
    1361             : #endif
    1362    28398728 :             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
    1363    28398728 :             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
    1364             :         }
    1365             : #endif
    1366   318542764 :     } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
    1367   135420704 :         uint8_t *dest = (uint8_t *) _dest;
    1368   135420704 :         const uint8_t *r = (const uint8_t *) _r;
    1369   135420704 :         const uint8_t *g = (const uint8_t *) _g;
    1370   135420704 :         const uint8_t *b = (const uint8_t *) _b;
    1371             : 
    1372             : #define r_b ((target == AV_PIX_FMT_RGB24) ? r : b)
    1373             : #define b_r ((target == AV_PIX_FMT_RGB24) ? b : r)
    1374             : 
    1375   135420704 :         dest[i * 6 + 0] = r_b[Y1];
    1376   135420704 :         dest[i * 6 + 1] =   g[Y1];
    1377   135420704 :         dest[i * 6 + 2] = b_r[Y1];
    1378   135420704 :         dest[i * 6 + 3] = r_b[Y2];
    1379   135420704 :         dest[i * 6 + 4] =   g[Y2];
    1380   135420704 :         dest[i * 6 + 5] = b_r[Y2];
    1381             : #undef r_b
    1382             : #undef b_r
    1383    47701356 :     } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
    1384     9113152 :                target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
    1385    47701356 :                target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
    1386    41669484 :         uint16_t *dest = (uint16_t *) _dest;
    1387    41669484 :         const uint16_t *r = (const uint16_t *) _r;
    1388    41669484 :         const uint16_t *g = (const uint16_t *) _g;
    1389    41669484 :         const uint16_t *b = (const uint16_t *) _b;
    1390             :         int dr1, dg1, db1, dr2, dg2, db2;
    1391             : 
    1392    41669484 :         if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
    1393    11676452 :             dr1 = ff_dither_2x2_8[ y & 1     ][0];
    1394    11676452 :             dg1 = ff_dither_2x2_4[ y & 1     ][0];
    1395    11676452 :             db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
    1396    11676452 :             dr2 = ff_dither_2x2_8[ y & 1     ][1];
    1397    11676452 :             dg2 = ff_dither_2x2_4[ y & 1     ][1];
    1398    11676452 :             db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
    1399    29993032 :         } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
    1400    26911752 :             dr1 = ff_dither_2x2_8[ y & 1     ][0];
    1401    26911752 :             dg1 = ff_dither_2x2_8[ y & 1     ][1];
    1402    26911752 :             db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
    1403    26911752 :             dr2 = ff_dither_2x2_8[ y & 1     ][1];
    1404    26911752 :             dg2 = ff_dither_2x2_8[ y & 1     ][0];
    1405    26911752 :             db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
    1406             :         } else {
    1407     3081280 :             dr1 = ff_dither_4x4_16[ y & 3     ][0];
    1408     3081280 :             dg1 = ff_dither_4x4_16[ y & 3     ][1];
    1409     3081280 :             db1 = ff_dither_4x4_16[(y & 3) ^ 3][0];
    1410     3081280 :             dr2 = ff_dither_4x4_16[ y & 3     ][1];
    1411     3081280 :             dg2 = ff_dither_4x4_16[ y & 3     ][0];
    1412     3081280 :             db2 = ff_dither_4x4_16[(y & 3) ^ 3][1];
    1413             :         }
    1414             : 
    1415    41669484 :         dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
    1416    41669484 :         dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
    1417             :     } else /* 8/4 bits */ {
    1418     6031872 :         uint8_t *dest = (uint8_t *) _dest;
    1419     6031872 :         const uint8_t *r = (const uint8_t *) _r;
    1420     6031872 :         const uint8_t *g = (const uint8_t *) _g;
    1421     6031872 :         const uint8_t *b = (const uint8_t *) _b;
    1422             :         int dr1, dg1, db1, dr2, dg2, db2;
    1423             : 
    1424     9884160 :         if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
    1425     3852288 :             const uint8_t * const d64 = ff_dither_8x8_73[y & 7];
    1426     3852288 :             const uint8_t * const d32 = ff_dither_8x8_32[y & 7];
    1427     3852288 :             dr1 = dg1 = d32[(i * 2 + 0) & 7];
    1428     3852288 :             db1 =       d64[(i * 2 + 0) & 7];
    1429     3852288 :             dr2 = dg2 = d32[(i * 2 + 1) & 7];
    1430     3852288 :             db2 =       d64[(i * 2 + 1) & 7];
    1431             :         } else {
    1432     2179584 :             const uint8_t * const d64  = ff_dither_8x8_73 [y & 7];
    1433     2179584 :             const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
    1434     2179584 :             dr1 = db1 = d128[(i * 2 + 0) & 7];
    1435     2179584 :             dg1 =        d64[(i * 2 + 0) & 7];
    1436     2179584 :             dr2 = db2 = d128[(i * 2 + 1) & 7];
    1437     2179584 :             dg2 =        d64[(i * 2 + 1) & 7];
    1438             :         }
    1439             : 
    1440     6031872 :         if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
    1441           0 :             dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
    1442           0 :                     ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
    1443             :         } else {
    1444     6031872 :             dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
    1445     6031872 :             dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
    1446             :         }
    1447             :     }
    1448   211520788 : }
    1449             : 
    1450             : static av_always_inline void
    1451     1128008 : yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
    1452             :                      const int16_t **lumSrc, int lumFilterSize,
    1453             :                      const int16_t *chrFilter, const int16_t **chrUSrc,
    1454             :                      const int16_t **chrVSrc, int chrFilterSize,
    1455             :                      const int16_t **alpSrc, uint8_t *dest, int dstW,
    1456             :                      int y, enum AVPixelFormat target, int hasAlpha)
    1457             : {
    1458             :     int i;
    1459             : 
    1460   189752496 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1461             :         int j, A1, A2;
    1462   188624488 :         int Y1 = 1 << 18;
    1463   188624488 :         int Y2 = 1 << 18;
    1464   188624488 :         int U  = 1 << 18;
    1465   188624488 :         int V  = 1 << 18;
    1466             :         const void *r, *g, *b;
    1467             : 
    1468   390160976 :         for (j = 0; j < lumFilterSize; j++) {
    1469   201536488 :             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
    1470   201536488 :             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
    1471             :         }
    1472   946559240 :         for (j = 0; j < chrFilterSize; j++) {
    1473   757934752 :             U += chrUSrc[j][i] * chrFilter[j];
    1474   757934752 :             V += chrVSrc[j][i] * chrFilter[j];
    1475             :         }
    1476   188624488 :         Y1 >>= 19;
    1477   188624488 :         Y2 >>= 19;
    1478   188624488 :         U  >>= 19;
    1479   188624488 :         V  >>= 19;
    1480   188624488 :         if (hasAlpha) {
    1481           0 :             A1 = 1 << 18;
    1482           0 :             A2 = 1 << 18;
    1483           0 :             for (j = 0; j < lumFilterSize; j++) {
    1484           0 :                 A1 += alpSrc[j][i * 2    ] * lumFilter[j];
    1485           0 :                 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
    1486             :             }
    1487           0 :             A1 >>= 19;
    1488           0 :             A2 >>= 19;
    1489           0 :             if ((A1 | A2) & 0x100) {
    1490           0 :                 A1 = av_clip_uint8(A1);
    1491           0 :                 A2 = av_clip_uint8(A2);
    1492             :             }
    1493             :         }
    1494             : 
    1495   188624488 :         r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM];
    1496   188624488 :         g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
    1497   188624488 :         b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
    1498             : 
    1499   188624488 :         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
    1500             :                       r, g, b, y, target, hasAlpha);
    1501             :     }
    1502     1128008 : }
    1503             : 
    1504             : static av_always_inline void
    1505           0 : yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
    1506             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
    1507             :                      const int16_t *abuf[2], uint8_t *dest, int dstW,
    1508             :                      int yalpha, int uvalpha, int y,
    1509             :                      enum AVPixelFormat target, int hasAlpha)
    1510             : {
    1511           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
    1512           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
    1513           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
    1514           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
    1515           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
    1516           0 :     int  yalpha1 = 4096 - yalpha;
    1517           0 :     int uvalpha1 = 4096 - uvalpha;
    1518             :     int i;
    1519             :     av_assert2(yalpha  <= 4096U);
    1520             :     av_assert2(uvalpha <= 4096U);
    1521             : 
    1522           0 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1523           0 :         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
    1524           0 :         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
    1525           0 :         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
    1526           0 :         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
    1527             :         int A1, A2;
    1528           0 :         const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
    1529           0 :                    *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
    1530           0 :                    *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
    1531             : 
    1532           0 :         if (hasAlpha) {
    1533           0 :             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
    1534           0 :             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
    1535           0 :             A1 = av_clip_uint8(A1);
    1536           0 :             A2 = av_clip_uint8(A2);
    1537             :         }
    1538             : 
    1539           0 :         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
    1540             :                       r, g, b, y, target, hasAlpha);
    1541             :     }
    1542           0 : }
    1543             : 
    1544             : static av_always_inline void
    1545      134700 : yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
    1546             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
    1547             :                      const int16_t *abuf0, uint8_t *dest, int dstW,
    1548             :                      int uvalpha, int y, enum AVPixelFormat target,
    1549             :                      int hasAlpha)
    1550             : {
    1551      134700 :     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
    1552             :     int i;
    1553             : 
    1554      134700 :     if (uvalpha < 2048) {
    1555    23031000 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1556    22896300 :             int Y1 = (buf0[i * 2    ] + 64) >> 7;
    1557    22896300 :             int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
    1558    22896300 :             int U  = (ubuf0[i]        + 64) >> 7;
    1559    22896300 :             int V  = (vbuf0[i]        + 64) >> 7;
    1560             :             int A1, A2;
    1561    22896300 :             const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
    1562    22896300 :                        *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
    1563    22896300 :                        *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
    1564             : 
    1565    22896300 :             if (hasAlpha) {
    1566           0 :                 A1 = abuf0[i * 2    ] * 255 + 16384 >> 15;
    1567           0 :                 A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15;
    1568           0 :                 A1 = av_clip_uint8(A1);
    1569           0 :                 A2 = av_clip_uint8(A2);
    1570             :             }
    1571             : 
    1572    22896300 :             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
    1573             :                           r, g, b, y, target, hasAlpha);
    1574             :         }
    1575             :     } else {
    1576           0 :         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
    1577           0 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1578           0 :             int Y1 = (buf0[i * 2    ]     +  64) >> 7;
    1579           0 :             int Y2 = (buf0[i * 2 + 1]     +  64) >> 7;
    1580           0 :             int U  = (ubuf0[i] + ubuf1[i] + 128) >> 8;
    1581           0 :             int V  = (vbuf0[i] + vbuf1[i] + 128) >> 8;
    1582             :             int A1, A2;
    1583           0 :             const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
    1584           0 :                        *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
    1585           0 :                        *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
    1586             : 
    1587           0 :             if (hasAlpha) {
    1588           0 :                 A1 = (abuf0[i * 2    ] + 64) >> 7;
    1589           0 :                 A2 = (abuf0[i * 2 + 1] + 64) >> 7;
    1590           0 :                 A1 = av_clip_uint8(A1);
    1591           0 :                 A2 = av_clip_uint8(A2);
    1592             :             }
    1593             : 
    1594           0 :             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
    1595             :                           r, g, b, y, target, hasAlpha);
    1596             :         }
    1597             :     }
    1598      134700 : }
    1599             : 
    1600             : #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
    1601             : static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
    1602             :                                 const int16_t **lumSrc, int lumFilterSize, \
    1603             :                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
    1604             :                                 const int16_t **chrVSrc, int chrFilterSize, \
    1605             :                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
    1606             :                                 int y) \
    1607             : { \
    1608             :     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
    1609             :                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
    1610             :                                   alpSrc, dest, dstW, y, fmt, hasAlpha); \
    1611             : }
    1612             : 
    1613             : #define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
    1614             : YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
    1615             : static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
    1616             :                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
    1617             :                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
    1618             :                                 int yalpha, int uvalpha, int y) \
    1619             : { \
    1620             :     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
    1621             :                                   dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
    1622             : }
    1623             : 
    1624             : #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
    1625             : YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
    1626             : static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
    1627             :                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
    1628             :                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
    1629             :                                 int uvalpha, int y) \
    1630             : { \
    1631             :     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
    1632             :                                   dstW, uvalpha, y, fmt, hasAlpha); \
    1633             : }
    1634             : 
    1635             : #if CONFIG_SMALL
    1636             : YUV2RGBWRAPPER(yuv2rgb,,  32_1,  AV_PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1637             : YUV2RGBWRAPPER(yuv2rgb,,  32,    AV_PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1638             : #else
    1639             : #if CONFIG_SWSCALE_ALPHA
    1640           0 : YUV2RGBWRAPPER(yuv2rgb,, a32_1,  AV_PIX_FMT_RGB32_1,   1)
    1641           0 : YUV2RGBWRAPPER(yuv2rgb,, a32,    AV_PIX_FMT_RGB32,     1)
    1642             : #endif
    1643       25488 : YUV2RGBWRAPPER(yuv2rgb,, x32_1,  AV_PIX_FMT_RGB32_1,   0)
    1644      141640 : YUV2RGBWRAPPER(yuv2rgb,, x32,    AV_PIX_FMT_RGB32,     0)
    1645             : #endif
    1646      667444 : YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24,   0)
    1647      151980 : YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24,   0)
    1648       68052 : YUV2RGBWRAPPER(yuv2rgb,,  16,    AV_PIX_FMT_RGB565,    0)
    1649      156152 : YUV2RGBWRAPPER(yuv2rgb,,  15,    AV_PIX_FMT_RGB555,    0)
    1650       17680 : YUV2RGBWRAPPER(yuv2rgb,,  12,    AV_PIX_FMT_RGB444,    0)
    1651       21888 : YUV2RGBWRAPPER(yuv2rgb,,   8,    AV_PIX_FMT_RGB8,      0)
    1652           0 : YUV2RGBWRAPPER(yuv2rgb,,   4,    AV_PIX_FMT_RGB4,      0)
    1653       12384 : YUV2RGBWRAPPER(yuv2rgb,,   4b,   AV_PIX_FMT_RGB4_BYTE, 0)
    1654             : 
    1655   110778895 : static av_always_inline void yuv2rgb_write_full(SwsContext *c,
    1656             :     uint8_t *dest, int i, int Y, int A, int U, int V,
    1657             :     int y, enum AVPixelFormat target, int hasAlpha, int err[4])
    1658             : {
    1659             :     int R, G, B;
    1660   110778895 :     int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8;
    1661             : 
    1662   110778895 :     Y -= c->yuv2rgb_y_offset;
    1663   110778895 :     Y *= c->yuv2rgb_y_coeff;
    1664   110778895 :     Y += 1 << 21;
    1665   110778895 :     R = Y + V*c->yuv2rgb_v2r_coeff;
    1666   110778895 :     G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
    1667   110778895 :     B = Y +                          U*c->yuv2rgb_u2b_coeff;
    1668   110778895 :     if ((R | G | B) & 0xC0000000) {
    1669    10039614 :         R = av_clip_uintp2(R, 30);
    1670    10039614 :         G = av_clip_uintp2(G, 30);
    1671    10039614 :         B = av_clip_uintp2(B, 30);
    1672             :     }
    1673             : 
    1674   110778895 :     switch(target) {
    1675       40000 :     case AV_PIX_FMT_ARGB:
    1676       40000 :         dest[0] = hasAlpha ? A : 255;
    1677       40000 :         dest[1] = R >> 22;
    1678       40000 :         dest[2] = G >> 22;
    1679       40000 :         dest[3] = B >> 22;
    1680       40000 :         break;
    1681    15611124 :     case AV_PIX_FMT_RGB24:
    1682    15611124 :         dest[0] = R >> 22;
    1683    15611124 :         dest[1] = G >> 22;
    1684    15611124 :         dest[2] = B >> 22;
    1685    15611124 :         break;
    1686       40000 :     case AV_PIX_FMT_RGBA:
    1687       40000 :         dest[0] = R >> 22;
    1688       40000 :         dest[1] = G >> 22;
    1689       40000 :         dest[2] = B >> 22;
    1690       40000 :         dest[3] = hasAlpha ? A : 255;
    1691       40000 :         break;
    1692       40000 :     case AV_PIX_FMT_ABGR:
    1693       40000 :         dest[0] = hasAlpha ? A : 255;
    1694       40000 :         dest[1] = B >> 22;
    1695       40000 :         dest[2] = G >> 22;
    1696       40000 :         dest[3] = R >> 22;
    1697       40000 :         break;
    1698    30548400 :     case AV_PIX_FMT_BGR24:
    1699    30548400 :         dest[0] = B >> 22;
    1700    30548400 :         dest[1] = G >> 22;
    1701    30548400 :         dest[2] = R >> 22;
    1702    30548400 :         break;
    1703    23667386 :     case AV_PIX_FMT_BGRA:
    1704    23667386 :         dest[0] = B >> 22;
    1705    23667386 :         dest[1] = G >> 22;
    1706    23667386 :         dest[2] = R >> 22;
    1707    23667386 :         dest[3] = hasAlpha ? A : 255;
    1708    23667386 :         break;
    1709    40831985 :     case AV_PIX_FMT_BGR4_BYTE:
    1710             :     case AV_PIX_FMT_RGB4_BYTE:
    1711             :     case AV_PIX_FMT_BGR8:
    1712             :     case AV_PIX_FMT_RGB8:
    1713             :     {
    1714             :         int r,g,b;
    1715             : 
    1716    40831985 :         switch (c->dither) {
    1717    40831985 :         default:
    1718             :         case SWS_DITHER_AUTO:
    1719             :         case SWS_DITHER_ED:
    1720    40831985 :             R >>= 22;
    1721    40831985 :             G >>= 22;
    1722    40831985 :             B >>= 22;
    1723    40831985 :             R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
    1724    40831985 :             G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4;
    1725    40831985 :             B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4;
    1726    40831985 :             c->dither_error[0][i] = err[0];
    1727    40831985 :             c->dither_error[1][i] = err[1];
    1728    40831985 :             c->dither_error[2][i] = err[2];
    1729    40831985 :             r = R >> (isrgb8 ? 5 : 7);
    1730    40831985 :             g = G >> (isrgb8 ? 5 : 6);
    1731    40831985 :             b = B >> (isrgb8 ? 6 : 7);
    1732    40831985 :             r = av_clip(r, 0, isrgb8 ? 7 : 1);
    1733    40831985 :             g = av_clip(g, 0, isrgb8 ? 7 : 3);
    1734    40831985 :             b = av_clip(b, 0, isrgb8 ? 3 : 1);
    1735    40831985 :             err[0] = R - r*(isrgb8 ? 36 : 255);
    1736    40831985 :             err[1] = G - g*(isrgb8 ? 36 : 85);
    1737    40831985 :             err[2] = B - b*(isrgb8 ? 85 : 255);
    1738    40831985 :             break;
    1739           0 :         case SWS_DITHER_A_DITHER:
    1740           0 :             if (isrgb8) {
    1741             :   /* see http://pippin.gimp.org/a_dither/ for details/origin */
    1742             : #define A_DITHER(u,v)   (((((u)+((v)*236))*119)&0xff))
    1743           0 :                 r = (((R >> 19) + A_DITHER(i,y)  -96)>>8);
    1744           0 :                 g = (((G >> 19) + A_DITHER(i + 17,y) - 96)>>8);
    1745           0 :                 b = (((B >> 20) + A_DITHER(i + 17*2,y) -96)>>8);
    1746           0 :                 r = av_clip_uintp2(r, 3);
    1747           0 :                 g = av_clip_uintp2(g, 3);
    1748           0 :                 b = av_clip_uintp2(b, 2);
    1749             :             } else {
    1750           0 :                 r = (((R >> 21) + A_DITHER(i,y)-256)>>8);
    1751           0 :                 g = (((G >> 19) + A_DITHER(i + 17,y)-256)>>8);
    1752           0 :                 b = (((B >> 21) + A_DITHER(i + 17*2,y)-256)>>8);
    1753           0 :                 r = av_clip_uintp2(r, 1);
    1754           0 :                 g = av_clip_uintp2(g, 2);
    1755           0 :                 b = av_clip_uintp2(b, 1);
    1756             :             }
    1757           0 :             break;
    1758           0 :         case SWS_DITHER_X_DITHER:
    1759           0 :             if (isrgb8) {
    1760             :   /* see http://pippin.gimp.org/a_dither/ for details/origin */
    1761             : #define X_DITHER(u,v)   (((((u)^((v)*237))*181)&0x1ff)/2)
    1762           0 :                 r = (((R >> 19) + X_DITHER(i,y) - 96)>>8);
    1763           0 :                 g = (((G >> 19) + X_DITHER(i + 17,y) - 96)>>8);
    1764           0 :                 b = (((B >> 20) + X_DITHER(i + 17*2,y) - 96)>>8);
    1765           0 :                 r = av_clip_uintp2(r, 3);
    1766           0 :                 g = av_clip_uintp2(g, 3);
    1767           0 :                 b = av_clip_uintp2(b, 2);
    1768             :             } else {
    1769           0 :                 r = (((R >> 21) + X_DITHER(i,y)-256)>>8);
    1770           0 :                 g = (((G >> 19) + X_DITHER(i + 17,y)-256)>>8);
    1771           0 :                 b = (((B >> 21) + X_DITHER(i + 17*2,y)-256)>>8);
    1772           0 :                 r = av_clip_uintp2(r, 1);
    1773           0 :                 g = av_clip_uintp2(g, 2);
    1774           0 :                 b = av_clip_uintp2(b, 1);
    1775             :             }
    1776             : 
    1777           0 :             break;
    1778             :         }
    1779             : 
    1780    40831985 :         if(target == AV_PIX_FMT_BGR4_BYTE) {
    1781     8166397 :             dest[0] = r + 2*g + 8*b;
    1782    32665588 :         } else if(target == AV_PIX_FMT_RGB4_BYTE) {
    1783     8166397 :             dest[0] = b + 2*g + 8*r;
    1784    24499191 :         } else if(target == AV_PIX_FMT_BGR8) {
    1785    16332794 :             dest[0] = r + 8*g + 64*b;
    1786     8166397 :         } else if(target == AV_PIX_FMT_RGB8) {
    1787     8166397 :             dest[0] = b + 4*g + 32*r;
    1788             :         } else
    1789             :             av_assert2(0);
    1790    40831985 :         break;}
    1791             :     }
    1792   110778895 : }
    1793             : 
    1794             : static av_always_inline void
    1795        7656 : yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
    1796             :                           const int16_t **lumSrc, int lumFilterSize,
    1797             :                           const int16_t *chrFilter, const int16_t **chrUSrc,
    1798             :                           const int16_t **chrVSrc, int chrFilterSize,
    1799             :                           const int16_t **alpSrc, uint8_t *dest,
    1800             :                           int dstW, int y, enum AVPixelFormat target, int hasAlpha)
    1801             : {
    1802             :     int i;
    1803        7656 :     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
    1804        7656 :     int err[4] = {0};
    1805        7656 :     int A = 0; //init to silence warning
    1806             : 
    1807        7656 :     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
    1808        7456 :        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
    1809         500 :         step = 1;
    1810             : 
    1811     3633742 :     for (i = 0; i < dstW; i++) {
    1812             :         int j;
    1813     3626086 :         int Y = 1<<9;
    1814     3626086 :         int U = (1<<9)-(128 << 19);
    1815     3626086 :         int V = (1<<9)-(128 << 19);
    1816             : 
    1817    20564230 :         for (j = 0; j < lumFilterSize; j++) {
    1818    16938144 :             Y += lumSrc[j][i] * lumFilter[j];
    1819             :         }
    1820    20564230 :         for (j = 0; j < chrFilterSize; j++) {
    1821    16938144 :             U += chrUSrc[j][i] * chrFilter[j];
    1822    16938144 :             V += chrVSrc[j][i] * chrFilter[j];
    1823             :         }
    1824     3626086 :         Y >>= 10;
    1825     3626086 :         U >>= 10;
    1826     3626086 :         V >>= 10;
    1827     3626086 :         if (hasAlpha) {
    1828     3339386 :             A = 1 << 18;
    1829    17976930 :             for (j = 0; j < lumFilterSize; j++) {
    1830    14637544 :                 A += alpSrc[j][i] * lumFilter[j];
    1831             :             }
    1832     3339386 :             A >>= 19;
    1833     3339386 :             if (A & 0x100)
    1834           0 :                 A = av_clip_uint8(A);
    1835             :         }
    1836     3626086 :         yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
    1837     3626086 :         dest += step;
    1838             :     }
    1839        7656 :     c->dither_error[0][i] = err[0];
    1840        7656 :     c->dither_error[1][i] = err[1];
    1841        7656 :     c->dither_error[2][i] = err[2];
    1842        7656 : }
    1843             : 
    1844             : static av_always_inline void
    1845           0 : yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2],
    1846             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
    1847             :                      const int16_t *abuf[2], uint8_t *dest, int dstW,
    1848             :                      int yalpha, int uvalpha, int y,
    1849             :                      enum AVPixelFormat target, int hasAlpha)
    1850             : {
    1851           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
    1852           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
    1853           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
    1854           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
    1855           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
    1856           0 :     int  yalpha1 = 4096 - yalpha;
    1857           0 :     int uvalpha1 = 4096 - uvalpha;
    1858             :     int i;
    1859           0 :     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
    1860           0 :     int err[4] = {0};
    1861           0 :     int A = 0; // init to silcene warning
    1862             : 
    1863             :     av_assert2(yalpha  <= 4096U);
    1864             :     av_assert2(uvalpha <= 4096U);
    1865             : 
    1866           0 :     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
    1867           0 :        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
    1868           0 :         step = 1;
    1869             : 
    1870           0 :     for (i = 0; i < dstW; i++) {
    1871           0 :         int Y = ( buf0[i] * yalpha1  +  buf1[i] * yalpha             ) >> 10; //FIXME rounding
    1872           0 :         int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10;
    1873           0 :         int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10;
    1874             : 
    1875           0 :         if (hasAlpha) {
    1876           0 :             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19;
    1877           0 :             if (A & 0x100)
    1878           0 :                 A = av_clip_uint8(A);
    1879             :         }
    1880             : 
    1881           0 :         yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
    1882           0 :         dest += step;
    1883             :     }
    1884           0 :     c->dither_error[0][i] = err[0];
    1885           0 :     c->dither_error[1][i] = err[1];
    1886           0 :     c->dither_error[2][i] = err[2];
    1887           0 : }
    1888             : 
    1889             : static av_always_inline void
    1890      387713 : yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0,
    1891             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
    1892             :                      const int16_t *abuf0, uint8_t *dest, int dstW,
    1893             :                      int uvalpha, int y, enum AVPixelFormat target,
    1894             :                      int hasAlpha)
    1895             : {
    1896      387713 :     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
    1897             :     int i;
    1898      387713 :     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
    1899      387713 :     int err[4] = {0};
    1900             : 
    1901      387713 :     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
    1902      312631 :        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
    1903      187705 :         step = 1;
    1904             : 
    1905      387713 :     if (uvalpha < 2048) {
    1906      387713 :         int A = 0; //init to silence warning
    1907   107540522 :         for (i = 0; i < dstW; i++) {
    1908   107152809 :             int Y = buf0[i] << 2;
    1909   107152809 :             int U = (ubuf0[i] - (128<<7)) * 4;
    1910   107152809 :             int V = (vbuf0[i] - (128<<7)) * 4;
    1911             : 
    1912   107152809 :             if (hasAlpha) {
    1913           0 :                 A = (abuf0[i] + 64) >> 7;
    1914           0 :                 if (A & 0x100)
    1915           0 :                     A = av_clip_uint8(A);
    1916             :             }
    1917             : 
    1918   107152809 :             yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
    1919   107152809 :             dest += step;
    1920             :         }
    1921             :     } else {
    1922           0 :         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
    1923           0 :         int A = 0; //init to silence warning
    1924           0 :         for (i = 0; i < dstW; i++) {
    1925           0 :             int Y = buf0[i] << 2;
    1926           0 :             int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1;
    1927           0 :             int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1;
    1928             : 
    1929           0 :             if (hasAlpha) {
    1930           0 :                 A = (abuf0[i] + 64) >> 7;
    1931           0 :                 if (A & 0x100)
    1932           0 :                     A = av_clip_uint8(A);
    1933             :             }
    1934             : 
    1935           0 :             yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
    1936           0 :             dest += step;
    1937             :         }
    1938             :     }
    1939             : 
    1940      387713 :     c->dither_error[0][i] = err[0];
    1941      387713 :     c->dither_error[1][i] = err[1];
    1942      387713 :     c->dither_error[2][i] = err[2];
    1943      387713 : }
    1944             : 
    1945             : #if CONFIG_SMALL
    1946             : YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1947             : YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1948             : YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1949             : YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1950             : #else
    1951             : #if CONFIG_SWSCALE_ALPHA
    1952        5366 : YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  1)
    1953         200 : YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  1)
    1954         200 : YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  1)
    1955         200 : YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  1)
    1956             : #endif
    1957       63900 : YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA,  0)
    1958           0 : YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR,  0)
    1959           0 : YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA,  0)
    1960           0 : YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB,  0)
    1961             : #endif
    1962       89900 : YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full,  AV_PIX_FMT_BGR24, 0)
    1963       47398 : YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full,  AV_PIX_FMT_RGB24, 0)
    1964             : 
    1965       37641 : YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full,  AV_PIX_FMT_BGR4_BYTE, 0)
    1966       37641 : YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full,  AV_PIX_FMT_RGB4_BYTE, 0)
    1967       75282 : YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full,   AV_PIX_FMT_BGR8,  0)
    1968       37641 : YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full,   AV_PIX_FMT_RGB8,  0)
    1969             : 
    1970             : static void
    1971      195948 : yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
    1972             :                   const int16_t **lumSrc, int lumFilterSize,
    1973             :                   const int16_t *chrFilter, const int16_t **chrUSrc,
    1974             :                   const int16_t **chrVSrc, int chrFilterSize,
    1975             :                   const int16_t **alpSrc, uint8_t **dest,
    1976             :                   int dstW, int y)
    1977             : {
    1978      195948 :     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
    1979             :     int i;
    1980      195948 :     int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc;
    1981      195948 :     uint16_t **dest16 = (uint16_t**)dest;
    1982      195948 :     int SH = 22 + 8 - desc->comp[0].depth;
    1983      195948 :     int A = 0; // init to silence warning
    1984             : 
    1985    68849100 :     for (i = 0; i < dstW; i++) {
    1986             :         int j;
    1987    68653152 :         int Y = 1 << 9;
    1988    68653152 :         int U = (1 << 9) - (128 << 19);
    1989    68653152 :         int V = (1 << 9) - (128 << 19);
    1990             :         int R, G, B;
    1991             : 
    1992   140386304 :         for (j = 0; j < lumFilterSize; j++)
    1993    71733152 :             Y += lumSrc[j][i] * lumFilter[j];
    1994             : 
    1995   345368576 :         for (j = 0; j < chrFilterSize; j++) {
    1996   276715424 :             U += chrUSrc[j][i] * chrFilter[j];
    1997   276715424 :             V += chrVSrc[j][i] * chrFilter[j];
    1998             :         }
    1999             : 
    2000    68653152 :         Y >>= 10;
    2001    68653152 :         U >>= 10;
    2002    68653152 :         V >>= 10;
    2003             : 
    2004    68653152 :         if (hasAlpha) {
    2005      145728 :             A = 1 << 18;
    2006             : 
    2007     1391456 :             for (j = 0; j < lumFilterSize; j++)
    2008     1245728 :                 A += alpSrc[j][i] * lumFilter[j];
    2009             : 
    2010      145728 :             if (A & 0xF8000000)
    2011       40000 :                 A =  av_clip_uintp2(A, 27);
    2012             :         }
    2013             : 
    2014    68653152 :         Y -= c->yuv2rgb_y_offset;
    2015    68653152 :         Y *= c->yuv2rgb_y_coeff;
    2016    68653152 :         Y += 1 << (SH-1);
    2017    68653152 :         R = Y + V * c->yuv2rgb_v2r_coeff;
    2018    68653152 :         G = Y + V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    2019    68653152 :         B = Y +                            U * c->yuv2rgb_u2b_coeff;
    2020             : 
    2021    68653152 :         if ((R | G | B) & 0xC0000000) {
    2022    13258228 :             R = av_clip_uintp2(R, 30);
    2023    13258228 :             G = av_clip_uintp2(G, 30);
    2024    13258228 :             B = av_clip_uintp2(B, 30);
    2025             :         }
    2026             : 
    2027    68653152 :         if (SH != 22) {
    2028    22339968 :             dest16[0][i] = G >> SH;
    2029    22339968 :             dest16[1][i] = B >> SH;
    2030    22339968 :             dest16[2][i] = R >> SH;
    2031    22339968 :             if (hasAlpha)
    2032       80000 :                 dest16[3][i] = A >> (SH - 3);
    2033             :         } else {
    2034    46313184 :             dest[0][i] = G >> 22;
    2035    46313184 :             dest[1][i] = B >> 22;
    2036    46313184 :             dest[2][i] = R >> 22;
    2037    46313184 :             if (hasAlpha)
    2038       65728 :                 dest[3][i] = A >> 19;
    2039             :         }
    2040             :     }
    2041      195948 :     if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
    2042     9270360 :         for (i = 0; i < dstW; i++) {
    2043     9243840 :             dest16[0][i] = av_bswap16(dest16[0][i]);
    2044     9243840 :             dest16[1][i] = av_bswap16(dest16[1][i]);
    2045     9243840 :             dest16[2][i] = av_bswap16(dest16[2][i]);
    2046     9243840 :             if (hasAlpha)
    2047       40000 :                 dest16[3][i] = av_bswap16(dest16[3][i]);
    2048             :         }
    2049             :     }
    2050      195948 : }
    2051             : 
    2052             : static void
    2053       47992 : yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter,
    2054             :                     const int16_t **lumSrcx, int lumFilterSize,
    2055             :                     const int16_t *chrFilter, const int16_t **chrUSrcx,
    2056             :                     const int16_t **chrVSrcx, int chrFilterSize,
    2057             :                     const int16_t **alpSrcx, uint8_t **dest,
    2058             :                     int dstW, int y)
    2059             : {
    2060       47992 :     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
    2061             :     int i;
    2062       47992 :     int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
    2063       47992 :     uint16_t **dest16 = (uint16_t**)dest;
    2064       47992 :     const int32_t **lumSrc  = (const int32_t**)lumSrcx;
    2065       47992 :     const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
    2066       47992 :     const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
    2067       47992 :     const int32_t **alpSrc  = (const int32_t**)alpSrcx;
    2068             : 
    2069    16779576 :     for (i = 0; i < dstW; i++) {
    2070             :         int j;
    2071    16731584 :         int Y = -0x40000000;
    2072    16731584 :         int U = -(128 << 23);
    2073    16731584 :         int V = -(128 << 23);
    2074             :         int R, G, B, A;
    2075             : 
    2076    34343168 :         for (j = 0; j < lumFilterSize; j++)
    2077    17611584 :             Y += lumSrc[j][i] * (unsigned)lumFilter[j];
    2078             : 
    2079    84220160 :         for (j = 0; j < chrFilterSize; j++) {
    2080    67488576 :             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
    2081    67488576 :             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
    2082             :         }
    2083             : 
    2084    16731584 :         Y >>= 14;
    2085    16731584 :         Y += 0x10000;
    2086    16731584 :         U >>= 14;
    2087    16731584 :         V >>= 14;
    2088             : 
    2089    16731584 :         if (hasAlpha) {
    2090       52960 :             A = -0x40000000;
    2091             : 
    2092      545920 :             for (j = 0; j < lumFilterSize; j++)
    2093      492960 :                 A += alpSrc[j][i] * lumFilter[j];
    2094             : 
    2095       52960 :             A >>= 1;
    2096       52960 :             A += 0x20002000;
    2097             :         }
    2098             : 
    2099    16731584 :         Y -= c->yuv2rgb_y_offset;
    2100    16731584 :         Y *= c->yuv2rgb_y_coeff;
    2101    16731584 :         Y += 1 << 13;
    2102    16731584 :         R = V * c->yuv2rgb_v2r_coeff;
    2103    16731584 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    2104    16731584 :         B =                            U * c->yuv2rgb_u2b_coeff;
    2105             : 
    2106    16731584 :         R = av_clip_uintp2(Y + R, 30);
    2107    16731584 :         G = av_clip_uintp2(Y + G, 30);
    2108    16731584 :         B = av_clip_uintp2(Y + B, 30);
    2109             : 
    2110    16731584 :         dest16[0][i] = G >> 14;
    2111    16731584 :         dest16[1][i] = B >> 14;
    2112    16731584 :         dest16[2][i] = R >> 14;
    2113    16731584 :         if (hasAlpha)
    2114       52960 :             dest16[3][i] = av_clip_uintp2(A, 30) >> 14;
    2115             :     }
    2116       47992 :     if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
    2117    13282800 :         for (i = 0; i < dstW; i++) {
    2118    13244800 :             dest16[0][i] = av_bswap16(dest16[0][i]);
    2119    13244800 :             dest16[1][i] = av_bswap16(dest16[1][i]);
    2120    13244800 :             dest16[2][i] = av_bswap16(dest16[2][i]);
    2121    13244800 :             if (hasAlpha)
    2122       32960 :                 dest16[3][i] = av_bswap16(dest16[3][i]);
    2123             :         }
    2124             :     }
    2125       47992 : }
    2126             : 
    2127             : static void
    2128           0 : yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
    2129             :             const int16_t *ubuf[2], const int16_t *vbuf[2],
    2130             :             const int16_t *abuf0, uint8_t *dest, int dstW,
    2131             :             int uvalpha, int y)
    2132             : {
    2133           0 :     int hasAlpha = !!abuf0;
    2134             :     int i;
    2135             : 
    2136           0 :     for (i = 0; i < dstW; i++) {
    2137           0 :         int Y = (buf0[i] + 64) >> 7;
    2138             :         int A;
    2139             : 
    2140           0 :         Y = av_clip_uint8(Y);
    2141             : 
    2142           0 :         if (hasAlpha) {
    2143           0 :             A = (abuf0[i] + 64) >> 7;
    2144           0 :             if (A & 0x100)
    2145           0 :                 A = av_clip_uint8(A);
    2146             :         }
    2147             : 
    2148           0 :         dest[i * 2    ] = Y;
    2149           0 :         dest[i * 2 + 1] = hasAlpha ? A : 255;
    2150             :     }
    2151           0 : }
    2152             : 
    2153             : static void
    2154           0 : yuv2ya8_2_c(SwsContext *c, const int16_t *buf[2],
    2155             :             const int16_t *ubuf[2], const int16_t *vbuf[2],
    2156             :             const int16_t *abuf[2], uint8_t *dest, int dstW,
    2157             :             int yalpha, int uvalpha, int y)
    2158             : {
    2159           0 :     int hasAlpha = abuf && abuf[0] && abuf[1];
    2160           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
    2161           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
    2162           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
    2163           0 :     int  yalpha1 = 4096 - yalpha;
    2164             :     int i;
    2165             : 
    2166             :     av_assert2(yalpha  <= 4096U);
    2167             : 
    2168           0 :     for (i = 0; i < dstW; i++) {
    2169           0 :         int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 19;
    2170             :         int A;
    2171             : 
    2172           0 :         Y = av_clip_uint8(Y);
    2173             : 
    2174           0 :         if (hasAlpha) {
    2175           0 :             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 19;
    2176           0 :             A = av_clip_uint8(A);
    2177             :         }
    2178             : 
    2179           0 :         dest[i * 2    ] = Y;
    2180           0 :         dest[i * 2 + 1] = hasAlpha ? A : 255;
    2181             :     }
    2182           0 : }
    2183             : 
    2184             : static void
    2185        4708 : yuv2ya8_X_c(SwsContext *c, const int16_t *lumFilter,
    2186             :             const int16_t **lumSrc, int lumFilterSize,
    2187             :             const int16_t *chrFilter, const int16_t **chrUSrc,
    2188             :             const int16_t **chrVSrc, int chrFilterSize,
    2189             :             const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
    2190             : {
    2191        4708 :     int hasAlpha = !!alpSrc;
    2192             :     int i;
    2193             : 
    2194     1646724 :     for (i = 0; i < dstW; i++) {
    2195             :         int j;
    2196     1642016 :         int Y = 1 << 18, A = 1 << 18;
    2197             : 
    2198     3504032 :         for (j = 0; j < lumFilterSize; j++)
    2199     1862016 :             Y += lumSrc[j][i] * lumFilter[j];
    2200             : 
    2201     1642016 :         Y >>= 19;
    2202     1642016 :         if (Y  & 0x100)
    2203        9140 :             Y = av_clip_uint8(Y);
    2204             : 
    2205     1642016 :         if (hasAlpha) {
    2206      260000 :             for (j = 0; j < lumFilterSize; j++)
    2207      240000 :                 A += alpSrc[j][i] * lumFilter[j];
    2208             : 
    2209       20000 :             A >>= 19;
    2210             : 
    2211       20000 :             if (A & 0x100)
    2212           0 :                 A = av_clip_uint8(A);
    2213             :         }
    2214             : 
    2215     1642016 :         dest[2 * i    ] = Y;
    2216     1642016 :         dest[2 * i + 1] = hasAlpha ? A : 255;
    2217             :     }
    2218        4708 : }
    2219             : 
    2220             : static void
    2221        4420 : yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter,
    2222             :                  const int16_t **_lumSrc, int lumFilterSize,
    2223             :                  const int16_t *chrFilter, const int16_t **_chrUSrc,
    2224             :                  const int16_t **_chrVSrc, int chrFilterSize,
    2225             :                  const int16_t **_alpSrc, uint8_t *dest, int dstW, int y)
    2226             : {
    2227        4420 :     const int32_t **lumSrc  = (const int32_t **) _lumSrc,
    2228        4420 :                   **chrUSrc = (const int32_t **) _chrUSrc,
    2229        4420 :                   **chrVSrc = (const int32_t **) _chrVSrc,
    2230        4420 :                   **alpSrc  = (const int32_t **) _alpSrc;
    2231        4420 :     int hasAlpha = !!alpSrc;
    2232             :     int i;
    2233             : 
    2234     1545060 :     for (i = 0; i < dstW; i++) {
    2235     1540640 :         int Y = 1 << 14, U = 1 << 14;
    2236     1540640 :         int V = 1 << 14, A = 1 << 14;
    2237             :         int j;
    2238             : 
    2239     1540640 :         Y -= 0x40000000;
    2240     1540640 :         U -= 0x40000000;
    2241     1540640 :         V -= 0x40000000;
    2242     1540640 :         A -= 0x40000000;
    2243             : 
    2244     3301280 :         for (j = 0; j < lumFilterSize; j++)
    2245     1760640 :             Y += lumSrc[j][i] * (unsigned)lumFilter[j];
    2246             : 
    2247     7863200 :         for (j = 0; j < chrFilterSize; j++)
    2248     6322560 :             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
    2249             : 
    2250     7863200 :         for (j = 0; j < chrFilterSize; j++)
    2251     6322560 :             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
    2252             : 
    2253     1540640 :         if (hasAlpha)
    2254      260000 :             for (j = 0; j < lumFilterSize; j++)
    2255      240000 :                 A += alpSrc[j][i] * (unsigned)lumFilter[j];
    2256             : 
    2257     1540640 :         Y = 0x8000 + av_clip_int16(Y >> 15);
    2258     1540640 :         U = 0x8000 + av_clip_int16(U >> 15);
    2259     1540640 :         V = 0x8000 + av_clip_int16(V >> 15);
    2260     1540640 :         A = 0x8000 + av_clip_int16(A >> 15);
    2261             : 
    2262     1540640 :         AV_WL16(dest + 8 * i, hasAlpha ? A : 65535);
    2263     1540640 :         AV_WL16(dest + 8 * i + 2, Y);
    2264     1540640 :         AV_WL16(dest + 8 * i + 4, U);
    2265     1540640 :         AV_WL16(dest + 8 * i + 6, V);
    2266             :     }
    2267        4420 : }
    2268             : 
    2269       64742 : av_cold void ff_sws_init_output_funcs(SwsContext *c,
    2270             :                                       yuv2planar1_fn *yuv2plane1,
    2271             :                                       yuv2planarX_fn *yuv2planeX,
    2272             :                                       yuv2interleavedX_fn *yuv2nv12cX,
    2273             :                                       yuv2packed1_fn *yuv2packed1,
    2274             :                                       yuv2packed2_fn *yuv2packed2,
    2275             :                                       yuv2packedX_fn *yuv2packedX,
    2276             :                                       yuv2anyX_fn *yuv2anyX)
    2277             : {
    2278       64742 :     enum AVPixelFormat dstFormat = c->dstFormat;
    2279       64742 :     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
    2280             : 
    2281       64742 :     if (dstFormat == AV_PIX_FMT_P010LE || dstFormat == AV_PIX_FMT_P010BE) {
    2282          44 :         *yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
    2283          44 :         *yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
    2284          44 :         *yuv2nv12cX = yuv2p010cX_c;
    2285       64698 :     } else if (is16BPS(dstFormat)) {
    2286        3366 :         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
    2287        3366 :         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
    2288        3366 :         if (dstFormat == AV_PIX_FMT_P016LE || dstFormat == AV_PIX_FMT_P016BE) {
    2289          44 :           *yuv2nv12cX = yuv2p016cX_c;
    2290             :         }
    2291       61332 :     } else if (isNBPS(dstFormat)) {
    2292        6173 :         if (desc->comp[0].depth == 9) {
    2293         588 :             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
    2294         588 :             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
    2295        5585 :         } else if (desc->comp[0].depth == 10) {
    2296        4474 :             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
    2297        4474 :             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
    2298        1111 :         } else if (desc->comp[0].depth == 12) {
    2299         726 :             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c  : yuv2planeX_12LE_c;
    2300         726 :             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c  : yuv2plane1_12LE_c;
    2301         385 :         } else if (desc->comp[0].depth == 14) {
    2302         385 :             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c  : yuv2planeX_14LE_c;
    2303         385 :             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c  : yuv2plane1_14LE_c;
    2304             :         } else
    2305           0 :             av_assert0(0);
    2306             :     } else {
    2307       55159 :         *yuv2plane1 = yuv2plane1_8_c;
    2308       55159 :         *yuv2planeX = yuv2planeX_8_c;
    2309       55159 :         if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)
    2310           6 :             *yuv2nv12cX = yuv2nv12cX_c;
    2311             :     }
    2312             : 
    2313       64742 :     if(c->flags & SWS_FULL_CHR_H_INT) {
    2314        5810 :         switch (dstFormat) {
    2315           6 :             case AV_PIX_FMT_RGBA:
    2316             : #if CONFIG_SMALL
    2317             :                 *yuv2packedX = yuv2rgba32_full_X_c;
    2318             :                 *yuv2packed2 = yuv2rgba32_full_2_c;
    2319             :                 *yuv2packed1 = yuv2rgba32_full_1_c;
    2320             : #else
    2321             : #if CONFIG_SWSCALE_ALPHA
    2322           6 :                 if (c->needAlpha) {
    2323           6 :                     *yuv2packedX = yuv2rgba32_full_X_c;
    2324           6 :                     *yuv2packed2 = yuv2rgba32_full_2_c;
    2325           6 :                     *yuv2packed1 = yuv2rgba32_full_1_c;
    2326             :                 } else
    2327             : #endif /* CONFIG_SWSCALE_ALPHA */
    2328             :                 {
    2329           0 :                     *yuv2packedX = yuv2rgbx32_full_X_c;
    2330           0 :                     *yuv2packed2 = yuv2rgbx32_full_2_c;
    2331           0 :                     *yuv2packed1 = yuv2rgbx32_full_1_c;
    2332             :                 }
    2333             : #endif /* !CONFIG_SMALL */
    2334           6 :                 break;
    2335           6 :             case AV_PIX_FMT_ARGB:
    2336             : #if CONFIG_SMALL
    2337             :                 *yuv2packedX = yuv2argb32_full_X_c;
    2338             :                 *yuv2packed2 = yuv2argb32_full_2_c;
    2339             :                 *yuv2packed1 = yuv2argb32_full_1_c;
    2340             : #else
    2341             : #if CONFIG_SWSCALE_ALPHA
    2342           6 :                 if (c->needAlpha) {
    2343           6 :                     *yuv2packedX = yuv2argb32_full_X_c;
    2344           6 :                     *yuv2packed2 = yuv2argb32_full_2_c;
    2345           6 :                     *yuv2packed1 = yuv2argb32_full_1_c;
    2346             :                 } else
    2347             : #endif /* CONFIG_SWSCALE_ALPHA */
    2348             :                 {
    2349           0 :                     *yuv2packedX = yuv2xrgb32_full_X_c;
    2350           0 :                     *yuv2packed2 = yuv2xrgb32_full_2_c;
    2351           0 :                     *yuv2packed1 = yuv2xrgb32_full_1_c;
    2352             :                 }
    2353             : #endif /* !CONFIG_SMALL */
    2354           6 :                 break;
    2355         739 :             case AV_PIX_FMT_BGRA:
    2356             : #if CONFIG_SMALL
    2357             :                 *yuv2packedX = yuv2bgra32_full_X_c;
    2358             :                 *yuv2packed2 = yuv2bgra32_full_2_c;
    2359             :                 *yuv2packed1 = yuv2bgra32_full_1_c;
    2360             : #else
    2361             : #if CONFIG_SWSCALE_ALPHA
    2362         739 :                 if (c->needAlpha) {
    2363          24 :                     *yuv2packedX = yuv2bgra32_full_X_c;
    2364          24 :                     *yuv2packed2 = yuv2bgra32_full_2_c;
    2365          24 :                     *yuv2packed1 = yuv2bgra32_full_1_c;
    2366             :                 } else
    2367             : #endif /* CONFIG_SWSCALE_ALPHA */
    2368             :                 {
    2369         715 :                     *yuv2packedX = yuv2bgrx32_full_X_c;
    2370         715 :                     *yuv2packed2 = yuv2bgrx32_full_2_c;
    2371         715 :                     *yuv2packed1 = yuv2bgrx32_full_1_c;
    2372             :                 }
    2373             : #endif /* !CONFIG_SMALL */
    2374         739 :                 break;
    2375           6 :             case AV_PIX_FMT_ABGR:
    2376             : #if CONFIG_SMALL
    2377             :                 *yuv2packedX = yuv2abgr32_full_X_c;
    2378             :                 *yuv2packed2 = yuv2abgr32_full_2_c;
    2379             :                 *yuv2packed1 = yuv2abgr32_full_1_c;
    2380             : #else
    2381             : #if CONFIG_SWSCALE_ALPHA
    2382           6 :                 if (c->needAlpha) {
    2383           6 :                     *yuv2packedX = yuv2abgr32_full_X_c;
    2384           6 :                     *yuv2packed2 = yuv2abgr32_full_2_c;
    2385           6 :                     *yuv2packed1 = yuv2abgr32_full_1_c;
    2386             :                 } else
    2387             : #endif /* CONFIG_SWSCALE_ALPHA */
    2388             :                 {
    2389           0 :                     *yuv2packedX = yuv2xbgr32_full_X_c;
    2390           0 :                     *yuv2packed2 = yuv2xbgr32_full_2_c;
    2391           0 :                     *yuv2packed1 = yuv2xbgr32_full_1_c;
    2392             :                 }
    2393             : #endif /* !CONFIG_SMALL */
    2394           6 :                 break;
    2395           3 :         case AV_PIX_FMT_RGBA64LE:
    2396             : #if CONFIG_SWSCALE_ALPHA
    2397           3 :             if (c->needAlpha) {
    2398           3 :                 *yuv2packedX = yuv2rgba64le_full_X_c;
    2399           3 :                 *yuv2packed2 = yuv2rgba64le_full_2_c;
    2400           3 :                 *yuv2packed1 = yuv2rgba64le_full_1_c;
    2401             :             } else
    2402             : #endif /* CONFIG_SWSCALE_ALPHA */
    2403             :             {
    2404           0 :                 *yuv2packedX = yuv2rgbx64le_full_X_c;
    2405           0 :                 *yuv2packed2 = yuv2rgbx64le_full_2_c;
    2406           0 :                 *yuv2packed1 = yuv2rgbx64le_full_1_c;
    2407             :             }
    2408           3 :             break;
    2409           3 :         case AV_PIX_FMT_RGBA64BE:
    2410             : #if CONFIG_SWSCALE_ALPHA
    2411           3 :             if (c->needAlpha) {
    2412           3 :                 *yuv2packedX = yuv2rgba64be_full_X_c;
    2413           3 :                 *yuv2packed2 = yuv2rgba64be_full_2_c;
    2414           3 :                 *yuv2packed1 = yuv2rgba64be_full_1_c;
    2415             :             } else
    2416             : #endif /* CONFIG_SWSCALE_ALPHA */
    2417             :             {
    2418           0 :                 *yuv2packedX = yuv2rgbx64be_full_X_c;
    2419           0 :                 *yuv2packed2 = yuv2rgbx64be_full_2_c;
    2420           0 :                 *yuv2packed1 = yuv2rgbx64be_full_1_c;
    2421             :             }
    2422           3 :             break;
    2423           3 :         case AV_PIX_FMT_BGRA64LE:
    2424             : #if CONFIG_SWSCALE_ALPHA
    2425           3 :             if (c->needAlpha) {
    2426           3 :                 *yuv2packedX = yuv2bgra64le_full_X_c;
    2427           3 :                 *yuv2packed2 = yuv2bgra64le_full_2_c;
    2428           3 :                 *yuv2packed1 = yuv2bgra64le_full_1_c;
    2429             :             } else
    2430             : #endif /* CONFIG_SWSCALE_ALPHA */
    2431             :             {
    2432           0 :                 *yuv2packedX = yuv2bgrx64le_full_X_c;
    2433           0 :                 *yuv2packed2 = yuv2bgrx64le_full_2_c;
    2434           0 :                 *yuv2packed1 = yuv2bgrx64le_full_1_c;
    2435             :             }
    2436           3 :             break;
    2437           3 :         case AV_PIX_FMT_BGRA64BE:
    2438             : #if CONFIG_SWSCALE_ALPHA
    2439           3 :             if (c->needAlpha) {
    2440           3 :                 *yuv2packedX = yuv2bgra64be_full_X_c;
    2441           3 :                 *yuv2packed2 = yuv2bgra64be_full_2_c;
    2442           3 :                 *yuv2packed1 = yuv2bgra64be_full_1_c;
    2443             :             } else
    2444             : #endif /* CONFIG_SWSCALE_ALPHA */
    2445             :             {
    2446           0 :                 *yuv2packedX = yuv2bgrx64be_full_X_c;
    2447           0 :                 *yuv2packed2 = yuv2bgrx64be_full_2_c;
    2448           0 :                 *yuv2packed1 = yuv2bgrx64be_full_1_c;
    2449             :             }
    2450           3 :             break;
    2451             : 
    2452         460 :         case AV_PIX_FMT_RGB24:
    2453         460 :             *yuv2packedX = yuv2rgb24_full_X_c;
    2454         460 :             *yuv2packed2 = yuv2rgb24_full_2_c;
    2455         460 :             *yuv2packed1 = yuv2rgb24_full_1_c;
    2456         460 :             break;
    2457         811 :         case AV_PIX_FMT_BGR24:
    2458         811 :             *yuv2packedX = yuv2bgr24_full_X_c;
    2459         811 :             *yuv2packed2 = yuv2bgr24_full_2_c;
    2460         811 :             *yuv2packed1 = yuv2bgr24_full_1_c;
    2461         811 :             break;
    2462           6 :         case AV_PIX_FMT_RGB48LE:
    2463           6 :             *yuv2packedX = yuv2rgb48le_full_X_c;
    2464           6 :             *yuv2packed2 = yuv2rgb48le_full_2_c;
    2465           6 :             *yuv2packed1 = yuv2rgb48le_full_1_c;
    2466           6 :             break;
    2467           3 :         case AV_PIX_FMT_BGR48LE:
    2468           3 :             *yuv2packedX = yuv2bgr48le_full_X_c;
    2469           3 :             *yuv2packed2 = yuv2bgr48le_full_2_c;
    2470           3 :             *yuv2packed1 = yuv2bgr48le_full_1_c;
    2471           3 :             break;
    2472           6 :         case AV_PIX_FMT_RGB48BE:
    2473           6 :             *yuv2packedX = yuv2rgb48be_full_X_c;
    2474           6 :             *yuv2packed2 = yuv2rgb48be_full_2_c;
    2475           6 :             *yuv2packed1 = yuv2rgb48be_full_1_c;
    2476           6 :             break;
    2477           3 :         case AV_PIX_FMT_BGR48BE:
    2478           3 :             *yuv2packedX = yuv2bgr48be_full_X_c;
    2479           3 :             *yuv2packed2 = yuv2bgr48be_full_2_c;
    2480           3 :             *yuv2packed1 = yuv2bgr48be_full_1_c;
    2481           3 :             break;
    2482         350 :         case AV_PIX_FMT_BGR4_BYTE:
    2483         350 :             *yuv2packedX = yuv2bgr4_byte_full_X_c;
    2484         350 :             *yuv2packed2 = yuv2bgr4_byte_full_2_c;
    2485         350 :             *yuv2packed1 = yuv2bgr4_byte_full_1_c;
    2486         350 :             break;
    2487         350 :         case AV_PIX_FMT_RGB4_BYTE:
    2488         350 :             *yuv2packedX = yuv2rgb4_byte_full_X_c;
    2489         350 :             *yuv2packed2 = yuv2rgb4_byte_full_2_c;
    2490         350 :             *yuv2packed1 = yuv2rgb4_byte_full_1_c;
    2491         350 :             break;
    2492         700 :         case AV_PIX_FMT_BGR8:
    2493         700 :             *yuv2packedX = yuv2bgr8_full_X_c;
    2494         700 :             *yuv2packed2 = yuv2bgr8_full_2_c;
    2495         700 :             *yuv2packed1 = yuv2bgr8_full_1_c;
    2496         700 :             break;
    2497         350 :         case AV_PIX_FMT_RGB8:
    2498         350 :             *yuv2packedX = yuv2rgb8_full_X_c;
    2499         350 :             *yuv2packed2 = yuv2rgb8_full_2_c;
    2500         350 :             *yuv2packed1 = yuv2rgb8_full_1_c;
    2501         350 :             break;
    2502        1590 :         case AV_PIX_FMT_GBRP:
    2503             :         case AV_PIX_FMT_GBRP9BE:
    2504             :         case AV_PIX_FMT_GBRP9LE:
    2505             :         case AV_PIX_FMT_GBRP10BE:
    2506             :         case AV_PIX_FMT_GBRP10LE:
    2507             :         case AV_PIX_FMT_GBRP12BE:
    2508             :         case AV_PIX_FMT_GBRP12LE:
    2509             :         case AV_PIX_FMT_GBRP14BE:
    2510             :         case AV_PIX_FMT_GBRP14LE:
    2511             :         case AV_PIX_FMT_GBRAP:
    2512             :         case AV_PIX_FMT_GBRAP10BE:
    2513             :         case AV_PIX_FMT_GBRAP10LE:
    2514             :         case AV_PIX_FMT_GBRAP12BE:
    2515             :         case AV_PIX_FMT_GBRAP12LE:
    2516        1590 :             *yuv2anyX = yuv2gbrp_full_X_c;
    2517        1590 :             break;
    2518         412 :         case AV_PIX_FMT_GBRP16BE:
    2519             :         case AV_PIX_FMT_GBRP16LE:
    2520             :         case AV_PIX_FMT_GBRAP16BE:
    2521             :         case AV_PIX_FMT_GBRAP16LE:
    2522         412 :             *yuv2anyX = yuv2gbrp16_full_X_c;
    2523         412 :             break;
    2524             :         }
    2525        5810 :         if (!*yuv2packedX && !*yuv2anyX)
    2526           0 :             goto YUV_PACKED;
    2527             :     } else {
    2528      117864 :         YUV_PACKED:
    2529       58932 :         switch (dstFormat) {
    2530          71 :         case AV_PIX_FMT_RGBA64LE:
    2531             : #if CONFIG_SWSCALE_ALPHA
    2532          71 :             if (c->needAlpha) {
    2533           0 :                 *yuv2packed1 = yuv2rgba64le_1_c;
    2534           0 :                 *yuv2packed2 = yuv2rgba64le_2_c;
    2535           0 :                 *yuv2packedX = yuv2rgba64le_X_c;
    2536             :             } else
    2537             : #endif /* CONFIG_SWSCALE_ALPHA */
    2538             :             {
    2539          71 :                 *yuv2packed1 = yuv2rgbx64le_1_c;
    2540          71 :                 *yuv2packed2 = yuv2rgbx64le_2_c;
    2541          71 :                 *yuv2packedX = yuv2rgbx64le_X_c;
    2542             :             }
    2543          71 :             break;
    2544          41 :         case AV_PIX_FMT_RGBA64BE:
    2545             : #if CONFIG_SWSCALE_ALPHA
    2546          41 :             if (c->needAlpha) {
    2547           0 :                 *yuv2packed1 = yuv2rgba64be_1_c;
    2548           0 :                 *yuv2packed2 = yuv2rgba64be_2_c;
    2549           0 :                 *yuv2packedX = yuv2rgba64be_X_c;
    2550             :             } else
    2551             : #endif /* CONFIG_SWSCALE_ALPHA */
    2552             :             {
    2553          41 :                 *yuv2packed1 = yuv2rgbx64be_1_c;
    2554          41 :                 *yuv2packed2 = yuv2rgbx64be_2_c;
    2555          41 :                 *yuv2packedX = yuv2rgbx64be_X_c;
    2556             :             }
    2557          41 :             break;
    2558          41 :         case AV_PIX_FMT_BGRA64LE:
    2559             : #if CONFIG_SWSCALE_ALPHA
    2560          41 :             if (c->needAlpha) {
    2561           0 :                 *yuv2packed1 = yuv2bgra64le_1_c;
    2562           0 :                 *yuv2packed2 = yuv2bgra64le_2_c;
    2563           0 :                 *yuv2packedX = yuv2bgra64le_X_c;
    2564             :             } else
    2565             : #endif /* CONFIG_SWSCALE_ALPHA */
    2566             :             {
    2567          41 :                 *yuv2packed1 = yuv2bgrx64le_1_c;
    2568          41 :                 *yuv2packed2 = yuv2bgrx64le_2_c;
    2569          41 :                 *yuv2packedX = yuv2bgrx64le_X_c;
    2570             :             }
    2571          41 :             break;
    2572          41 :         case AV_PIX_FMT_BGRA64BE:
    2573             : #if CONFIG_SWSCALE_ALPHA
    2574          41 :             if (c->needAlpha) {
    2575           0 :                 *yuv2packed1 = yuv2bgra64be_1_c;
    2576           0 :                 *yuv2packed2 = yuv2bgra64be_2_c;
    2577           0 :                 *yuv2packedX = yuv2bgra64be_X_c;
    2578             :             } else
    2579             : #endif /* CONFIG_SWSCALE_ALPHA */
    2580             :             {
    2581          41 :                 *yuv2packed1 = yuv2bgrx64be_1_c;
    2582          41 :                 *yuv2packed2 = yuv2bgrx64be_2_c;
    2583          41 :                 *yuv2packedX = yuv2bgrx64be_X_c;
    2584             :             }
    2585          41 :             break;
    2586         950 :         case AV_PIX_FMT_RGB48LE:
    2587         950 :             *yuv2packed1 = yuv2rgb48le_1_c;
    2588         950 :             *yuv2packed2 = yuv2rgb48le_2_c;
    2589         950 :             *yuv2packedX = yuv2rgb48le_X_c;
    2590         950 :             break;
    2591         136 :         case AV_PIX_FMT_RGB48BE:
    2592         136 :             *yuv2packed1 = yuv2rgb48be_1_c;
    2593         136 :             *yuv2packed2 = yuv2rgb48be_2_c;
    2594         136 :             *yuv2packedX = yuv2rgb48be_X_c;
    2595         136 :             break;
    2596          41 :         case AV_PIX_FMT_BGR48LE:
    2597          41 :             *yuv2packed1 = yuv2bgr48le_1_c;
    2598          41 :             *yuv2packed2 = yuv2bgr48le_2_c;
    2599          41 :             *yuv2packedX = yuv2bgr48le_X_c;
    2600          41 :             break;
    2601          41 :         case AV_PIX_FMT_BGR48BE:
    2602          41 :             *yuv2packed1 = yuv2bgr48be_1_c;
    2603          41 :             *yuv2packed2 = yuv2bgr48be_2_c;
    2604          41 :             *yuv2packedX = yuv2bgr48be_X_c;
    2605          41 :             break;
    2606        1233 :         case AV_PIX_FMT_RGB32:
    2607             :         case AV_PIX_FMT_BGR32:
    2608             : #if CONFIG_SMALL
    2609             :             *yuv2packed1 = yuv2rgb32_1_c;
    2610             :             *yuv2packed2 = yuv2rgb32_2_c;
    2611             :             *yuv2packedX = yuv2rgb32_X_c;
    2612             : #else
    2613             : #if CONFIG_SWSCALE_ALPHA
    2614        1233 :                 if (c->needAlpha) {
    2615           0 :                     *yuv2packed1 = yuv2rgba32_1_c;
    2616           0 :                     *yuv2packed2 = yuv2rgba32_2_c;
    2617           0 :                     *yuv2packedX = yuv2rgba32_X_c;
    2618             :                 } else
    2619             : #endif /* CONFIG_SWSCALE_ALPHA */
    2620             :                 {
    2621        1233 :                     *yuv2packed1 = yuv2rgbx32_1_c;
    2622        1233 :                     *yuv2packed2 = yuv2rgbx32_2_c;
    2623        1233 :                     *yuv2packedX = yuv2rgbx32_X_c;
    2624             :                 }
    2625             : #endif /* !CONFIG_SMALL */
    2626        1233 :             break;
    2627         313 :         case AV_PIX_FMT_RGB32_1:
    2628             :         case AV_PIX_FMT_BGR32_1:
    2629             : #if CONFIG_SMALL
    2630             :                 *yuv2packed1 = yuv2rgb32_1_1_c;
    2631             :                 *yuv2packed2 = yuv2rgb32_1_2_c;
    2632             :                 *yuv2packedX = yuv2rgb32_1_X_c;
    2633             : #else
    2634             : #if CONFIG_SWSCALE_ALPHA
    2635         313 :                 if (c->needAlpha) {
    2636           0 :                     *yuv2packed1 = yuv2rgba32_1_1_c;
    2637           0 :                     *yuv2packed2 = yuv2rgba32_1_2_c;
    2638           0 :                     *yuv2packedX = yuv2rgba32_1_X_c;
    2639             :                 } else
    2640             : #endif /* CONFIG_SWSCALE_ALPHA */
    2641             :                 {
    2642         313 :                     *yuv2packed1 = yuv2rgbx32_1_1_c;
    2643         313 :                     *yuv2packed2 = yuv2rgbx32_1_2_c;
    2644         313 :                     *yuv2packedX = yuv2rgbx32_1_X_c;
    2645             :                 }
    2646             : #endif /* !CONFIG_SMALL */
    2647         313 :                 break;
    2648        5781 :         case AV_PIX_FMT_RGB24:
    2649        5781 :             *yuv2packed1 = yuv2rgb24_1_c;
    2650        5781 :             *yuv2packed2 = yuv2rgb24_2_c;
    2651        5781 :             *yuv2packedX = yuv2rgb24_X_c;
    2652        5781 :             break;
    2653        1351 :         case AV_PIX_FMT_BGR24:
    2654        1351 :             *yuv2packed1 = yuv2bgr24_1_c;
    2655        1351 :             *yuv2packed2 = yuv2bgr24_2_c;
    2656        1351 :             *yuv2packedX = yuv2bgr24_X_c;
    2657        1351 :             break;
    2658         624 :         case AV_PIX_FMT_RGB565LE:
    2659             :         case AV_PIX_FMT_RGB565BE:
    2660             :         case AV_PIX_FMT_BGR565LE:
    2661             :         case AV_PIX_FMT_BGR565BE:
    2662         624 :             *yuv2packed1 = yuv2rgb16_1_c;
    2663         624 :             *yuv2packed2 = yuv2rgb16_2_c;
    2664         624 :             *yuv2packedX = yuv2rgb16_X_c;
    2665         624 :             break;
    2666        1331 :         case AV_PIX_FMT_RGB555LE:
    2667             :         case AV_PIX_FMT_RGB555BE:
    2668             :         case AV_PIX_FMT_BGR555LE:
    2669             :         case AV_PIX_FMT_BGR555BE:
    2670        1331 :             *yuv2packed1 = yuv2rgb15_1_c;
    2671        1331 :             *yuv2packed2 = yuv2rgb15_2_c;
    2672        1331 :             *yuv2packedX = yuv2rgb15_X_c;
    2673        1331 :             break;
    2674         176 :         case AV_PIX_FMT_RGB444LE:
    2675             :         case AV_PIX_FMT_RGB444BE:
    2676             :         case AV_PIX_FMT_BGR444LE:
    2677             :         case AV_PIX_FMT_BGR444BE:
    2678         176 :             *yuv2packed1 = yuv2rgb12_1_c;
    2679         176 :             *yuv2packed2 = yuv2rgb12_2_c;
    2680         176 :             *yuv2packedX = yuv2rgb12_X_c;
    2681         176 :             break;
    2682         183 :         case AV_PIX_FMT_RGB8:
    2683             :         case AV_PIX_FMT_BGR8:
    2684         183 :             *yuv2packed1 = yuv2rgb8_1_c;
    2685         183 :             *yuv2packed2 = yuv2rgb8_2_c;
    2686         183 :             *yuv2packedX = yuv2rgb8_X_c;
    2687         183 :             break;
    2688           0 :         case AV_PIX_FMT_RGB4:
    2689             :         case AV_PIX_FMT_BGR4:
    2690           0 :             *yuv2packed1 = yuv2rgb4_1_c;
    2691           0 :             *yuv2packed2 = yuv2rgb4_2_c;
    2692           0 :             *yuv2packedX = yuv2rgb4_X_c;
    2693           0 :             break;
    2694         109 :         case AV_PIX_FMT_RGB4_BYTE:
    2695             :         case AV_PIX_FMT_BGR4_BYTE:
    2696         109 :             *yuv2packed1 = yuv2rgb4b_1_c;
    2697         109 :             *yuv2packed2 = yuv2rgb4b_2_c;
    2698         109 :             *yuv2packedX = yuv2rgb4b_X_c;
    2699         109 :             break;
    2700             :         }
    2701             :     }
    2702       64742 :     switch (dstFormat) {
    2703         952 :     case AV_PIX_FMT_MONOWHITE:
    2704         952 :         *yuv2packed1 = yuv2monowhite_1_c;
    2705         952 :         *yuv2packed2 = yuv2monowhite_2_c;
    2706         952 :         *yuv2packedX = yuv2monowhite_X_c;
    2707         952 :         break;
    2708          64 :     case AV_PIX_FMT_MONOBLACK:
    2709          64 :         *yuv2packed1 = yuv2monoblack_1_c;
    2710          64 :         *yuv2packed2 = yuv2monoblack_2_c;
    2711          64 :         *yuv2packedX = yuv2monoblack_X_c;
    2712          64 :         break;
    2713          94 :     case AV_PIX_FMT_YUYV422:
    2714          94 :         *yuv2packed1 = yuv2yuyv422_1_c;
    2715          94 :         *yuv2packed2 = yuv2yuyv422_2_c;
    2716          94 :         *yuv2packedX = yuv2yuyv422_X_c;
    2717          94 :         break;
    2718          35 :     case AV_PIX_FMT_YVYU422:
    2719          35 :         *yuv2packed1 = yuv2yvyu422_1_c;
    2720          35 :         *yuv2packed2 = yuv2yvyu422_2_c;
    2721          35 :         *yuv2packedX = yuv2yvyu422_X_c;
    2722          35 :         break;
    2723         338 :     case AV_PIX_FMT_UYVY422:
    2724         338 :         *yuv2packed1 = yuv2uyvy422_1_c;
    2725         338 :         *yuv2packed2 = yuv2uyvy422_2_c;
    2726         338 :         *yuv2packedX = yuv2uyvy422_X_c;
    2727         338 :         break;
    2728          47 :     case AV_PIX_FMT_YA8:
    2729          47 :         *yuv2packed1 = yuv2ya8_1_c;
    2730          47 :         *yuv2packed2 = yuv2ya8_2_c;
    2731          47 :         *yuv2packedX = yuv2ya8_X_c;
    2732          47 :         break;
    2733          44 :     case AV_PIX_FMT_AYUV64LE:
    2734          44 :         *yuv2packedX = yuv2ayuv64le_X_c;
    2735          44 :         break;
    2736             :     }
    2737       64742 : }

Generated by: LCOV version 1.13