LCOV - code coverage report
Current view: top level - libswscale - output.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 987 1486 66.4 %
Date: 2017-10-24 00:14:43 Functions: 103 226 45.6 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
       3             :  *
       4             :  * This file is part of FFmpeg.
       5             :  *
       6             :  * FFmpeg is free software; you can redistribute it and/or
       7             :  * modify it under the terms of the GNU Lesser General Public
       8             :  * License as published by the Free Software Foundation; either
       9             :  * version 2.1 of the License, or (at your option) any later version.
      10             :  *
      11             :  * FFmpeg is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :  * Lesser General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public
      17             :  * License along with FFmpeg; if not, write to the Free Software
      18             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      19             :  */
      20             : 
      21             : #include <math.h>
      22             : #include <stdint.h>
      23             : #include <stdio.h>
      24             : #include <string.h>
      25             : 
      26             : #include "libavutil/attributes.h"
      27             : #include "libavutil/avutil.h"
      28             : #include "libavutil/avassert.h"
      29             : #include "libavutil/bswap.h"
      30             : #include "libavutil/cpu.h"
      31             : #include "libavutil/intreadwrite.h"
      32             : #include "libavutil/mathematics.h"
      33             : #include "libavutil/pixdesc.h"
      34             : #include "config.h"
      35             : #include "rgb2rgb.h"
      36             : #include "swscale.h"
      37             : #include "swscale_internal.h"
      38             : 
      39             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = {
      40             : {  1,   3,   1,   3,   1,   3,   1,   3, },
      41             : {  2,   0,   2,   0,   2,   0,   2,   0, },
      42             : {  1,   3,   1,   3,   1,   3,   1,   3, },
      43             : };
      44             : 
      45             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = {
      46             : {  6,   2,   6,   2,   6,   2,   6,   2, },
      47             : {  0,   4,   0,   4,   0,   4,   0,   4, },
      48             : {  6,   2,   6,   2,   6,   2,   6,   2, },
      49             : };
      50             : 
      51             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = {
      52             : {  8,   4,  11,   7,   8,   4,  11,   7, },
      53             : {  2,  14,   1,  13,   2,  14,   1,  13, },
      54             : { 10,   6,   9,   5,  10,   6,   9,   5, },
      55             : {  0,  12,   3,  15,   0,  12,   3,  15, },
      56             : {  8,   4,  11,   7,   8,   4,  11,   7, },
      57             : };
      58             : 
      59             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = {
      60             : { 17,   9,  23,  15,  16,   8,  22,  14, },
      61             : {  5,  29,   3,  27,   4,  28,   2,  26, },
      62             : { 21,  13,  19,  11,  20,  12,  18,  10, },
      63             : {  0,  24,   6,  30,   1,  25,   7,  31, },
      64             : { 16,   8,  22,  14,  17,   9,  23,  15, },
      65             : {  4,  28,   2,  26,   5,  29,   3,  27, },
      66             : { 20,  12,  18,  10,  21,  13,  19,  11, },
      67             : {  1,  25,   7,  31,   0,  24,   6,  30, },
      68             : { 17,   9,  23,  15,  16,   8,  22,  14, },
      69             : };
      70             : 
      71             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = {
      72             : {  0,  55,  14,  68,   3,  58,  17,  72, },
      73             : { 37,  18,  50,  32,  40,  22,  54,  35, },
      74             : {  9,  64,   5,  59,  13,  67,   8,  63, },
      75             : { 46,  27,  41,  23,  49,  31,  44,  26, },
      76             : {  2,  57,  16,  71,   1,  56,  15,  70, },
      77             : { 39,  21,  52,  34,  38,  19,  51,  33, },
      78             : { 11,  66,   7,  62,  10,  65,   6,  60, },
      79             : { 48,  30,  43,  25,  47,  29,  42,  24, },
      80             : {  0,  55,  14,  68,   3,  58,  17,  72, },
      81             : };
      82             : 
      83             : #if 1
      84             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
      85             : {117,  62, 158, 103, 113,  58, 155, 100, },
      86             : { 34, 199,  21, 186,  31, 196,  17, 182, },
      87             : {144,  89, 131,  76, 141,  86, 127,  72, },
      88             : {  0, 165,  41, 206,  10, 175,  52, 217, },
      89             : {110,  55, 151,  96, 120,  65, 162, 107, },
      90             : { 28, 193,  14, 179,  38, 203,  24, 189, },
      91             : {138,  83, 124,  69, 148,  93, 134,  79, },
      92             : {  7, 172,  48, 213,   3, 168,  45, 210, },
      93             : {117,  62, 158, 103, 113,  58, 155, 100, },
      94             : };
      95             : #elif 1
      96             : // tries to correct a gamma of 1.5
      97             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
      98             : {  0, 143,  18, 200,   2, 156,  25, 215, },
      99             : { 78,  28, 125,  64,  89,  36, 138,  74, },
     100             : { 10, 180,   3, 161,  16, 195,   8, 175, },
     101             : {109,  51,  93,  38, 121,  60, 105,  47, },
     102             : {  1, 152,  23, 210,   0, 147,  20, 205, },
     103             : { 85,  33, 134,  71,  81,  30, 130,  67, },
     104             : { 14, 190,   6, 171,  12, 185,   5, 166, },
     105             : {117,  57, 101,  44, 113,  54,  97,  41, },
     106             : {  0, 143,  18, 200,   2, 156,  25, 215, },
     107             : };
     108             : #elif 1
     109             : // tries to correct a gamma of 2.0
     110             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
     111             : {  0, 124,   8, 193,   0, 140,  12, 213, },
     112             : { 55,  14, 104,  42,  66,  19, 119,  52, },
     113             : {  3, 168,   1, 145,   6, 187,   3, 162, },
     114             : { 86,  31,  70,  21,  99,  39,  82,  28, },
     115             : {  0, 134,  11, 206,   0, 129,   9, 200, },
     116             : { 62,  17, 114,  48,  58,  16, 109,  45, },
     117             : {  5, 181,   2, 157,   4, 175,   1, 151, },
     118             : { 95,  36,  78,  26,  90,  34,  74,  24, },
     119             : {  0, 124,   8, 193,   0, 140,  12, 213, },
     120             : };
     121             : #else
     122             : // tries to correct a gamma of 2.5
     123             : DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
     124             : {  0, 107,   3, 187,   0, 125,   6, 212, },
     125             : { 39,   7,  86,  28,  49,  11, 102,  36, },
     126             : {  1, 158,   0, 131,   3, 180,   1, 151, },
     127             : { 68,  19,  52,  12,  81,  25,  64,  17, },
     128             : {  0, 119,   5, 203,   0, 113,   4, 195, },
     129             : { 45,   9,  96,  33,  42,   8,  91,  30, },
     130             : {  2, 172,   1, 144,   2, 165,   0, 137, },
     131             : { 77,  23,  60,  15,  72,  21,  56,  14, },
     132             : {  0, 107,   3, 187,   0, 125,   6, 212, },
     133             : };
     134             : #endif
     135             : 
     136             : #define output_pixel(pos, val, bias, signedness) \
     137             :     if (big_endian) { \
     138             :         AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
     139             :     } else { \
     140             :         AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
     141             :     }
     142             : 
     143             : static av_always_inline void
     144      247460 : yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
     145             :                          int big_endian, int output_bits)
     146             : {
     147             :     int i;
     148      247460 :     int shift = 3;
     149      247460 :     av_assert0(output_bits == 16);
     150             : 
     151    85140580 :     for (i = 0; i < dstW; i++) {
     152    84893120 :         int val = src[i] + (1 << (shift - 1));
     153    84893120 :         output_pixel(&dest[i], val, 0, uint);
     154             :     }
     155      247460 : }
     156             : 
     157             : static av_always_inline void
     158      168680 : yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
     159             :                          const int32_t **src, uint16_t *dest, int dstW,
     160             :                          int big_endian, int output_bits)
     161             : {
     162             :     int i;
     163      168680 :     int shift = 15;
     164      168680 :     av_assert0(output_bits == 16);
     165             : 
     166    51246776 :     for (i = 0; i < dstW; i++) {
     167    51078096 :         int val = 1 << (shift - 1);
     168             :         int j;
     169             : 
     170             :         /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
     171             :          * filters (or anything with negative coeffs, the range can be slightly
     172             :          * wider in both directions. To account for this overflow, we subtract
     173             :          * a constant so it always fits in the signed range (assuming a
     174             :          * reasonable filterSize), and re-add that at the end. */
     175    51078096 :         val -= 0x40000000;
     176   260830480 :         for (j = 0; j < filterSize; j++)
     177   209752384 :             val += src[j][i] * (unsigned)filter[j];
     178             : 
     179    51078096 :         output_pixel(&dest[i], val, 0x8000, int);
     180             :     }
     181      168680 : }
     182             : 
     183             : #undef output_pixel
     184             : 
     185             : #define output_pixel(pos, val) \
     186             :     if (big_endian) { \
     187             :         AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
     188             :     } else { \
     189             :         AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
     190             :     }
     191             : 
     192             : static av_always_inline void
     193      632316 : yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
     194             :                          int big_endian, int output_bits)
     195             : {
     196             :     int i;
     197      632316 :     int shift = 15 - output_bits;
     198             : 
     199   195378348 :     for (i = 0; i < dstW; i++) {
     200   194746032 :         int val = src[i] + (1 << (shift - 1));
     201   194746032 :         output_pixel(&dest[i], val);
     202             :     }
     203      632316 : }
     204             : 
     205             : static av_always_inline void
     206     1167368 : yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
     207             :                          const int16_t **src, uint16_t *dest, int dstW,
     208             :                          int big_endian, int output_bits)
     209             : {
     210             :     int i;
     211     1167368 :     int shift = 11 + 16 - output_bits;
     212             : 
     213   385150760 :     for (i = 0; i < dstW; i++) {
     214   383983392 :         int val = 1 << (shift - 1);
     215             :         int j;
     216             : 
     217  1936556960 :         for (j = 0; j < filterSize; j++)
     218  1552573568 :             val += src[j][i] * filter[j];
     219             : 
     220   383983392 :         output_pixel(&dest[i], val);
     221             :     }
     222     1167368 : }
     223             : 
     224             : #undef output_pixel
     225             : 
     226             : #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
     227             : static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
     228             :                               uint8_t *dest, int dstW, \
     229             :                               const uint8_t *dither, int offset)\
     230             : { \
     231             :     yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
     232             :                          (uint16_t *) dest, dstW, is_be, bits); \
     233             : }\
     234             : static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
     235             :                               const int16_t **src, uint8_t *dest, int dstW, \
     236             :                               const uint8_t *dither, int offset)\
     237             : { \
     238             :     yuv2planeX_## template_size ## _c_template(filter, \
     239             :                          filterSize, (const typeX_t **) src, \
     240             :                          (uint16_t *) dest, dstW, is_be, bits); \
     241             : }
     242       57872 : yuv2NBPS( 9, BE, 1, 10, int16_t)
     243       65072 : yuv2NBPS( 9, LE, 0, 10, int16_t)
     244       66136 : yuv2NBPS(10, BE, 1, 10, int16_t)
     245     1425364 : yuv2NBPS(10, LE, 0, 10, int16_t)
     246       39116 : yuv2NBPS(12, BE, 1, 10, int16_t)
     247       89228 : yuv2NBPS(12, LE, 0, 10, int16_t)
     248       26720 : yuv2NBPS(14, BE, 1, 10, int16_t)
     249       30176 : yuv2NBPS(14, LE, 0, 10, int16_t)
     250       79940 : yuv2NBPS(16, BE, 1, 16, int32_t)
     251      336200 : yuv2NBPS(16, LE, 0, 16, int32_t)
     252             : 
     253     9779454 : static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
     254             :                            const int16_t **src, uint8_t *dest, int dstW,
     255             :                            const uint8_t *dither, int offset)
     256             : {
     257             :     int i;
     258  6246191568 :     for (i=0; i<dstW; i++) {
     259  6236412114 :         int val = dither[(i + offset) & 7] << 12;
     260             :         int j;
     261 25579491927 :         for (j=0; j<filterSize; j++)
     262 19343079813 :             val += src[j][i] * filter[j];
     263             : 
     264  6236412114 :         dest[i]= av_clip_uint8(val>>19);
     265             :     }
     266     9779454 : }
     267             : 
     268     5170349 : static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
     269             :                            const uint8_t *dither, int offset)
     270             : {
     271             :     int i;
     272  1739643830 :     for (i=0; i<dstW; i++) {
     273  1734473481 :         int val = (src[i] + dither[(i + offset) & 7]) >> 7;
     274  1734473481 :         dest[i]= av_clip_uint8(val);
     275             :     }
     276     5170349 : }
     277             : 
     278         100 : static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
     279             :                         const int16_t **chrUSrc, const int16_t **chrVSrc,
     280             :                         uint8_t *dest, int chrDstW)
     281             : {
     282         100 :     enum AVPixelFormat dstFormat = c->dstFormat;
     283         100 :     const uint8_t *chrDither = c->chrDither8;
     284             :     int i;
     285             : 
     286         100 :     if (dstFormat == AV_PIX_FMT_NV12)
     287        5050 :         for (i=0; i<chrDstW; i++) {
     288        5000 :             int u = chrDither[i & 7] << 12;
     289        5000 :             int v = chrDither[(i + 3) & 7] << 12;
     290             :             int j;
     291       65000 :             for (j=0; j<chrFilterSize; j++) {
     292       60000 :                 u += chrUSrc[j][i] * chrFilter[j];
     293       60000 :                 v += chrVSrc[j][i] * chrFilter[j];
     294             :             }
     295             : 
     296        5000 :             dest[2*i]= av_clip_uint8(u>>19);
     297        5000 :             dest[2*i+1]= av_clip_uint8(v>>19);
     298             :         }
     299             :     else
     300        5050 :         for (i=0; i<chrDstW; i++) {
     301        5000 :             int u = chrDither[i & 7] << 12;
     302        5000 :             int v = chrDither[(i + 3) & 7] << 12;
     303             :             int j;
     304       65000 :             for (j=0; j<chrFilterSize; j++) {
     305       60000 :                 u += chrUSrc[j][i] * chrFilter[j];
     306       60000 :                 v += chrVSrc[j][i] * chrFilter[j];
     307             :             }
     308             : 
     309        5000 :             dest[2*i]= av_clip_uint8(v>>19);
     310        5000 :             dest[2*i+1]= av_clip_uint8(u>>19);
     311             :         }
     312         100 : }
     313             : 
     314             : 
     315             : #define output_pixel(pos, val) \
     316             :     if (big_endian) { \
     317             :         AV_WB16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
     318             :     } else { \
     319             :         AV_WL16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
     320             :     }
     321             : 
     322        3744 : static void yuv2p010l1_c(const int16_t *src,
     323             :                          uint16_t *dest, int dstW,
     324             :                          int big_endian)
     325             : {
     326             :     int i;
     327        3744 :     int shift = 5;
     328             : 
     329     1321632 :     for (i = 0; i < dstW; i++) {
     330     1317888 :         int val = src[i] + (1 << (shift - 1));
     331     1317888 :         output_pixel(&dest[i], val);
     332             :     }
     333        3744 : }
     334             : 
     335         200 : static void yuv2p010lX_c(const int16_t *filter, int filterSize,
     336             :                          const int16_t **src, uint16_t *dest, int dstW,
     337             :                          int big_endian)
     338             : {
     339             :     int i, j;
     340         200 :     int shift = 17;
     341             : 
     342       40200 :     for (i = 0; i < dstW; i++) {
     343       40000 :         int val = 1 << (shift - 1);
     344             : 
     345      520000 :         for (j = 0; j < filterSize; j++)
     346      480000 :             val += src[j][i] * filter[j];
     347             : 
     348       40000 :         output_pixel(&dest[i], val);
     349             :     }
     350         200 : }
     351             : 
     352        1972 : static void yuv2p010cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
     353             :                          const int16_t **chrUSrc, const int16_t **chrVSrc,
     354             :                          uint8_t *dest8, int chrDstW)
     355             : {
     356        1972 :     uint16_t *dest = (uint16_t*)dest8;
     357        1972 :     int shift = 17;
     358        1972 :     int big_endian = c->dstFormat == AV_PIX_FMT_P010BE;
     359             :     int i, j;
     360             : 
     361      341444 :     for (i = 0; i < chrDstW; i++) {
     362      339472 :         int u = 1 << (shift - 1);
     363      339472 :         int v = 1 << (shift - 1);
     364             : 
     365      788944 :         for (j = 0; j < chrFilterSize; j++) {
     366      449472 :             u += chrUSrc[j][i] * chrFilter[j];
     367      449472 :             v += chrVSrc[j][i] * chrFilter[j];
     368             :         }
     369             : 
     370      339472 :         output_pixel(&dest[2*i]  , u);
     371      339472 :         output_pixel(&dest[2*i+1], v);
     372             :     }
     373        1972 : }
     374             : 
     375           0 : static void yuv2p010l1_LE_c(const int16_t *src,
     376             :                             uint8_t *dest, int dstW,
     377             :                             const uint8_t *dither, int offset)
     378             : {
     379           0 :     yuv2p010l1_c(src, (uint16_t*)dest, dstW, 0);
     380           0 : }
     381             : 
     382        3744 : static void yuv2p010l1_BE_c(const int16_t *src,
     383             :                             uint8_t *dest, int dstW,
     384             :                             const uint8_t *dither, int offset)
     385             : {
     386        3744 :     yuv2p010l1_c(src, (uint16_t*)dest, dstW, 1);
     387        3744 : }
     388             : 
     389         100 : static void yuv2p010lX_LE_c(const int16_t *filter, int filterSize,
     390             :                             const int16_t **src, uint8_t *dest, int dstW,
     391             :                             const uint8_t *dither, int offset)
     392             : {
     393         100 :     yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 0);
     394         100 : }
     395             : 
     396         100 : static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
     397             :                             const int16_t **src, uint8_t *dest, int dstW,
     398             :                             const uint8_t *dither, int offset)
     399             : {
     400         100 :     yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
     401         100 : }
     402             : 
     403             : #undef output_pixel
     404             : 
     405             : 
     406             : #define accumulate_bit(acc, val) \
     407             :     acc <<= 1; \
     408             :     acc |= (val) >= 234
     409             : #define output_pixel(pos, acc) \
     410             :     if (target == AV_PIX_FMT_MONOBLACK) { \
     411             :         pos = acc; \
     412             :     } else { \
     413             :         pos = ~acc; \
     414             :     }
     415             : 
     416             : static av_always_inline void
     417       71308 : yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
     418             :                       const int16_t **lumSrc, int lumFilterSize,
     419             :                       const int16_t *chrFilter, const int16_t **chrUSrc,
     420             :                       const int16_t **chrVSrc, int chrFilterSize,
     421             :                       const int16_t **alpSrc, uint8_t *dest, int dstW,
     422             :                       int y, enum AVPixelFormat target)
     423             : {
     424       71308 :     const uint8_t * const d128 = ff_dither_8x8_220[y&7];
     425             :     int i;
     426       71308 :     unsigned acc = 0;
     427       71308 :     int err = 0;
     428             : 
     429    12336016 :     for (i = 0; i < dstW; i += 2) {
     430             :         int j;
     431    12264708 :         int Y1 = 1 << 18;
     432    12264708 :         int Y2 = 1 << 18;
     433             : 
     434    24749416 :         for (j = 0; j < lumFilterSize; j++) {
     435    12484708 :             Y1 += lumSrc[j][i]   * lumFilter[j];
     436    12484708 :             Y2 += lumSrc[j][i+1] * lumFilter[j];
     437             :         }
     438    12264708 :         Y1 >>= 19;
     439    12264708 :         Y2 >>= 19;
     440    12264708 :         if ((Y1 | Y2) & 0x100) {
     441         376 :             Y1 = av_clip_uint8(Y1);
     442         376 :             Y2 = av_clip_uint8(Y2);
     443             :         }
     444    12264708 :         if (c->dither == SWS_DITHER_ED) {
     445           0 :             Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
     446           0 :             c->dither_error[0][i] = err;
     447           0 :             acc = 2*acc + (Y1 >= 128);
     448           0 :             Y1 -= 220*(acc&1);
     449             : 
     450           0 :             err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4);
     451           0 :             c->dither_error[0][i+1] = Y1;
     452           0 :             acc = 2*acc + (err >= 128);
     453           0 :             err -= 220*(acc&1);
     454             :         } else {
     455    12264708 :             accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
     456    12264708 :             accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
     457             :         }
     458    12264708 :         if ((i & 7) == 6) {
     459     3065752 :             output_pixel(*dest++, acc);
     460             :         }
     461             :     }
     462       71308 :     c->dither_error[0][i] = err;
     463             : 
     464       71308 :     if (i & 6) {
     465        1700 :         output_pixel(*dest, acc);
     466             :     }
     467       71308 : }
     468             : 
     469             : static av_always_inline void
     470           0 : yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
     471             :                       const int16_t *ubuf[2], const int16_t *vbuf[2],
     472             :                       const int16_t *abuf[2], uint8_t *dest, int dstW,
     473             :                       int yalpha, int uvalpha, int y,
     474             :                       enum AVPixelFormat target)
     475             : {
     476           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1];
     477           0 :     const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
     478           0 :     int  yalpha1 = 4096 - yalpha;
     479             :     int i;
     480             :     av_assert2(yalpha  <= 4096U);
     481             : 
     482           0 :     if (c->dither == SWS_DITHER_ED) {
     483           0 :         int err = 0;
     484           0 :         int acc = 0;
     485           0 :         for (i = 0; i < dstW; i +=2) {
     486             :             int Y;
     487             : 
     488           0 :             Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
     489           0 :             Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
     490           0 :             c->dither_error[0][i] = err;
     491           0 :             acc = 2*acc + (Y >= 128);
     492           0 :             Y -= 220*(acc&1);
     493             : 
     494           0 :             err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
     495           0 :             err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
     496           0 :             c->dither_error[0][i+1] = Y;
     497           0 :             acc = 2*acc + (err >= 128);
     498           0 :             err -= 220*(acc&1);
     499             : 
     500           0 :             if ((i & 7) == 6)
     501           0 :                 output_pixel(*dest++, acc);
     502             :         }
     503           0 :         c->dither_error[0][i] = err;
     504             :     } else {
     505           0 :     for (i = 0; i < dstW; i += 8) {
     506           0 :         int Y, acc = 0;
     507             : 
     508           0 :         Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
     509           0 :         accumulate_bit(acc, Y + d128[0]);
     510           0 :         Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
     511           0 :         accumulate_bit(acc, Y + d128[1]);
     512           0 :         Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19;
     513           0 :         accumulate_bit(acc, Y + d128[2]);
     514           0 :         Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19;
     515           0 :         accumulate_bit(acc, Y + d128[3]);
     516           0 :         Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19;
     517           0 :         accumulate_bit(acc, Y + d128[4]);
     518           0 :         Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19;
     519           0 :         accumulate_bit(acc, Y + d128[5]);
     520           0 :         Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19;
     521           0 :         accumulate_bit(acc, Y + d128[6]);
     522           0 :         Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19;
     523           0 :         accumulate_bit(acc, Y + d128[7]);
     524             : 
     525           0 :         output_pixel(*dest++, acc);
     526             :     }
     527             :     }
     528           0 : }
     529             : 
     530             : static av_always_inline void
     531        9600 : yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
     532             :                       const int16_t *ubuf[2], const int16_t *vbuf[2],
     533             :                       const int16_t *abuf0, uint8_t *dest, int dstW,
     534             :                       int uvalpha, int y, enum AVPixelFormat target)
     535             : {
     536        9600 :     const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
     537             :     int i;
     538             : 
     539        9600 :     if (c->dither == SWS_DITHER_ED) {
     540           0 :         int err = 0;
     541           0 :         int acc = 0;
     542           0 :         for (i = 0; i < dstW; i +=2) {
     543             :             int Y;
     544             : 
     545           0 :             Y = ((buf0[i + 0] + 64) >> 7);
     546           0 :             Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
     547           0 :             c->dither_error[0][i] = err;
     548           0 :             acc = 2*acc + (Y >= 128);
     549           0 :             Y -= 220*(acc&1);
     550             : 
     551           0 :             err = ((buf0[i + 1] + 64) >> 7);
     552           0 :             err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
     553           0 :             c->dither_error[0][i+1] = Y;
     554           0 :             acc = 2*acc + (err >= 128);
     555           0 :             err -= 220*(acc&1);
     556             : 
     557           0 :             if ((i & 7) == 6)
     558           0 :                 output_pixel(*dest++, acc);
     559             :         }
     560           0 :         c->dither_error[0][i] = err;
     561             :     } else {
     562       67200 :     for (i = 0; i < dstW; i += 8) {
     563       57600 :         int acc = 0;
     564       57600 :         accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
     565       57600 :         accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
     566       57600 :         accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
     567       57600 :         accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]);
     568       57600 :         accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]);
     569       57600 :         accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]);
     570       57600 :         accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]);
     571       57600 :         accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]);
     572             : 
     573       57600 :         output_pixel(*dest++, acc);
     574             :     }
     575             :     }
     576        9600 : }
     577             : 
     578             : #undef output_pixel
     579             : #undef accumulate_bit
     580             : 
     581             : #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
     582             : static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
     583             :                                 const int16_t **lumSrc, int lumFilterSize, \
     584             :                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
     585             :                                 const int16_t **chrVSrc, int chrFilterSize, \
     586             :                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
     587             :                                 int y) \
     588             : { \
     589             :     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
     590             :                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
     591             :                                   alpSrc, dest, dstW, y, fmt); \
     592             : } \
     593             :  \
     594             : static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
     595             :                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
     596             :                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
     597             :                                 int yalpha, int uvalpha, int y) \
     598             : { \
     599             :     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
     600             :                                   dest, dstW, yalpha, uvalpha, y, fmt); \
     601             : } \
     602             :  \
     603             : static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
     604             :                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
     605             :                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
     606             :                                 int uvalpha, int y) \
     607             : { \
     608             :     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
     609             :                                   abuf0, dest, dstW, uvalpha, \
     610             :                                   y, fmt); \
     611             : }
     612             : 
     613       73320 : YUV2PACKEDWRAPPER(yuv2mono,, white, AV_PIX_FMT_MONOWHITE)
     614        7588 : YUV2PACKEDWRAPPER(yuv2mono,, black, AV_PIX_FMT_MONOBLACK)
     615             : 
     616             : #define output_pixels(pos, Y1, U, Y2, V) \
     617             :     if (target == AV_PIX_FMT_YUYV422) { \
     618             :         dest[pos + 0] = Y1; \
     619             :         dest[pos + 1] = U;  \
     620             :         dest[pos + 2] = Y2; \
     621             :         dest[pos + 3] = V;  \
     622             :     } else if (target == AV_PIX_FMT_YVYU422) { \
     623             :         dest[pos + 0] = Y1; \
     624             :         dest[pos + 1] = V;  \
     625             :         dest[pos + 2] = Y2; \
     626             :         dest[pos + 3] = U;  \
     627             :     } else { /* AV_PIX_FMT_UYVY422 */ \
     628             :         dest[pos + 0] = U;  \
     629             :         dest[pos + 1] = Y1; \
     630             :         dest[pos + 2] = V;  \
     631             :         dest[pos + 3] = Y2; \
     632             :     }
     633             : 
     634             : static av_always_inline void
     635       18732 : yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
     636             :                      const int16_t **lumSrc, int lumFilterSize,
     637             :                      const int16_t *chrFilter, const int16_t **chrUSrc,
     638             :                      const int16_t **chrVSrc, int chrFilterSize,
     639             :                      const int16_t **alpSrc, uint8_t *dest, int dstW,
     640             :                      int y, enum AVPixelFormat target)
     641             : {
     642             :     int i;
     643             : 
     644     3292764 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
     645             :         int j;
     646     3274032 :         int Y1 = 1 << 18;
     647     3274032 :         int Y2 = 1 << 18;
     648     3274032 :         int U  = 1 << 18;
     649     3274032 :         int V  = 1 << 18;
     650             : 
     651     6878064 :         for (j = 0; j < lumFilterSize; j++) {
     652     3604032 :             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
     653     3604032 :             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
     654             :         }
     655    16610160 :         for (j = 0; j < chrFilterSize; j++) {
     656    13336128 :             U += chrUSrc[j][i] * chrFilter[j];
     657    13336128 :             V += chrVSrc[j][i] * chrFilter[j];
     658             :         }
     659     3274032 :         Y1 >>= 19;
     660     3274032 :         Y2 >>= 19;
     661     3274032 :         U  >>= 19;
     662     3274032 :         V  >>= 19;
     663     3274032 :         if ((Y1 | Y2 | U | V) & 0x100) {
     664          17 :             Y1 = av_clip_uint8(Y1);
     665          17 :             Y2 = av_clip_uint8(Y2);
     666          17 :             U  = av_clip_uint8(U);
     667          17 :             V  = av_clip_uint8(V);
     668             :         }
     669     3274032 :         output_pixels(4*i, Y1, U, Y2, V);
     670             :     }
     671       18732 : }
     672             : 
     673             : static av_always_inline void
     674           0 : yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
     675             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
     676             :                      const int16_t *abuf[2], uint8_t *dest, int dstW,
     677             :                      int yalpha, int uvalpha, int y,
     678             :                      enum AVPixelFormat target)
     679             : {
     680           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
     681           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
     682           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
     683           0 :     int  yalpha1 = 4096 - yalpha;
     684           0 :     int uvalpha1 = 4096 - uvalpha;
     685             :     int i;
     686             :     av_assert2(yalpha  <= 4096U);
     687             :     av_assert2(uvalpha <= 4096U);
     688             : 
     689           0 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
     690           0 :         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
     691           0 :         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
     692           0 :         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
     693           0 :         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
     694             : 
     695           0 :         if ((Y1 | Y2 | U | V) & 0x100) {
     696           0 :             Y1 = av_clip_uint8(Y1);
     697           0 :             Y2 = av_clip_uint8(Y2);
     698           0 :             U  = av_clip_uint8(U);
     699           0 :             V  = av_clip_uint8(V);
     700             :         }
     701             : 
     702           0 :         output_pixels(i * 4, Y1, U, Y2, V);
     703             :     }
     704           0 : }
     705             : 
     706             : static av_always_inline void
     707       86400 : yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
     708             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
     709             :                      const int16_t *abuf0, uint8_t *dest, int dstW,
     710             :                      int uvalpha, int y, enum AVPixelFormat target)
     711             : {
     712       86400 :     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     713             :     int i;
     714             : 
     715       86400 :     if (uvalpha < 2048) {
     716    31190400 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
     717    31104000 :             int Y1 = (buf0[i * 2    ]+64) >> 7;
     718    31104000 :             int Y2 = (buf0[i * 2 + 1]+64) >> 7;
     719    31104000 :             int U  = (ubuf0[i]       +64) >> 7;
     720    31104000 :             int V  = (vbuf0[i]       +64) >> 7;
     721             : 
     722    31104000 :             if ((Y1 | Y2 | U | V) & 0x100) {
     723           0 :                 Y1 = av_clip_uint8(Y1);
     724           0 :                 Y2 = av_clip_uint8(Y2);
     725           0 :                 U  = av_clip_uint8(U);
     726           0 :                 V  = av_clip_uint8(V);
     727             :             }
     728             : 
     729    31104000 :             Y1 = av_clip_uint8(Y1);
     730    31104000 :             Y2 = av_clip_uint8(Y2);
     731    31104000 :             U  = av_clip_uint8(U);
     732    31104000 :             V  = av_clip_uint8(V);
     733             : 
     734    31104000 :             output_pixels(i * 4, Y1, U, Y2, V);
     735             :         }
     736             :     } else {
     737           0 :         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
     738           0 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
     739           0 :             int Y1 = (buf0[i * 2    ]    + 64) >> 7;
     740           0 :             int Y2 = (buf0[i * 2 + 1]    + 64) >> 7;
     741           0 :             int U  = (ubuf0[i] + ubuf1[i]+128) >> 8;
     742           0 :             int V  = (vbuf0[i] + vbuf1[i]+128) >> 8;
     743             : 
     744           0 :             if ((Y1 | Y2 | U | V) & 0x100) {
     745           0 :                 Y1 = av_clip_uint8(Y1);
     746           0 :                 Y2 = av_clip_uint8(Y2);
     747           0 :                 U  = av_clip_uint8(U);
     748           0 :                 V  = av_clip_uint8(V);
     749             :             }
     750             : 
     751           0 :             Y1 = av_clip_uint8(Y1);
     752           0 :             Y2 = av_clip_uint8(Y2);
     753           0 :             U  = av_clip_uint8(U);
     754           0 :             V  = av_clip_uint8(V);
     755             : 
     756           0 :             output_pixels(i * 4, Y1, U, Y2, V);
     757             :         }
     758             :     }
     759       86400 : }
     760             : 
     761             : #undef output_pixels
     762             : 
     763       11620 : YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422)
     764        3556 : YUV2PACKEDWRAPPER(yuv2, 422, yvyu422, AV_PIX_FMT_YVYU422)
     765       89956 : YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
     766             : 
     767             : #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
     768             : #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
     769             : #define output_pixel(pos, val) \
     770             :     if (isBE(target)) { \
     771             :         AV_WB16(pos, val); \
     772             :     } else { \
     773             :         AV_WL16(pos, val); \
     774             :     }
     775             : 
     776             : static av_always_inline void
     777      104804 : yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
     778             :                        const int32_t **lumSrc, int lumFilterSize,
     779             :                        const int16_t *chrFilter, const int32_t **chrUSrc,
     780             :                        const int32_t **chrVSrc, int chrFilterSize,
     781             :                        const int32_t **alpSrc, uint16_t *dest, int dstW,
     782             :                        int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
     783             : {
     784             :     int i;
     785      104804 :     int A1 = 0xffff<<14, A2 = 0xffff<<14;
     786             : 
     787    18280008 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
     788             :         int j;
     789    18175204 :         int Y1 = -0x40000000;
     790    18175204 :         int Y2 = -0x40000000;
     791    18175204 :         int U  = -(128 << 23); // 19
     792    18175204 :         int V  = -(128 << 23);
     793             :         int R, G, B;
     794             : 
     795    36350408 :         for (j = 0; j < lumFilterSize; j++) {
     796    18175204 :             Y1 += lumSrc[j][i * 2]     * (unsigned)lumFilter[j];
     797    18175204 :             Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
     798             :         }
     799    90876020 :         for (j = 0; j < chrFilterSize; j++) {;
     800    72700816 :             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
     801    72700816 :             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
     802             :         }
     803             : 
     804    18175204 :         if (hasAlpha) {
     805           0 :             A1 = -0x40000000;
     806           0 :             A2 = -0x40000000;
     807           0 :             for (j = 0; j < lumFilterSize; j++) {
     808           0 :                 A1 += alpSrc[j][i * 2]     * (unsigned)lumFilter[j];
     809           0 :                 A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
     810             :             }
     811           0 :             A1 >>= 1;
     812           0 :             A1 += 0x20002000;
     813           0 :             A2 >>= 1;
     814           0 :             A2 += 0x20002000;
     815             :         }
     816             : 
     817             :         // 8 bits: 12+15=27; 16 bits: 12+19=31
     818    18175204 :         Y1 >>= 14; // 10
     819    18175204 :         Y1 += 0x10000;
     820    18175204 :         Y2 >>= 14;
     821    18175204 :         Y2 += 0x10000;
     822    18175204 :         U  >>= 14;
     823    18175204 :         V  >>= 14;
     824             : 
     825             :         // 8 bits: 27 -> 17 bits, 16 bits: 31 - 14 = 17 bits
     826    18175204 :         Y1 -= c->yuv2rgb_y_offset;
     827    18175204 :         Y2 -= c->yuv2rgb_y_offset;
     828    18175204 :         Y1 *= c->yuv2rgb_y_coeff;
     829    18175204 :         Y2 *= c->yuv2rgb_y_coeff;
     830    18175204 :         Y1 += 1 << 13; // 21
     831    18175204 :         Y2 += 1 << 13;
     832             :         // 8 bits: 17 + 13 bits = 30 bits, 16 bits: 17 + 13 bits = 30 bits
     833             : 
     834    18175204 :         R = V * c->yuv2rgb_v2r_coeff;
     835    18175204 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
     836    18175204 :         B =                            U * c->yuv2rgb_u2b_coeff;
     837             : 
     838             :         // 8 bits: 30 - 22 = 8 bits, 16 bits: 30 bits - 14 = 16 bits
     839    18175204 :         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
     840    18175204 :         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
     841    18175204 :         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
     842    18175204 :         if (eightbytes) {
     843     3548160 :             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
     844     3548160 :             output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
     845     3548160 :             output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
     846     3548160 :             output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
     847     3548160 :             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
     848     3548160 :             dest += 8;
     849             :         } else {
     850    14627044 :             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
     851    14627044 :             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
     852    14627044 :             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
     853    14627044 :             dest += 6;
     854             :         }
     855             :     }
     856      104804 : }
     857             : 
     858             : static av_always_inline void
     859           0 : yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
     860             :                        const int32_t *ubuf[2], const int32_t *vbuf[2],
     861             :                        const int32_t *abuf[2], uint16_t *dest, int dstW,
     862             :                        int yalpha, int uvalpha, int y,
     863             :                        enum AVPixelFormat target, int hasAlpha, int eightbytes)
     864             : {
     865           0 :     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
     866           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
     867           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
     868           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
     869           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
     870           0 :     int  yalpha1 = 4096 - yalpha;
     871           0 :     int uvalpha1 = 4096 - uvalpha;
     872             :     int i;
     873           0 :     int A1 = 0xffff<<14, A2 = 0xffff<<14;
     874             : 
     875             :     av_assert2(yalpha  <= 4096U);
     876             :     av_assert2(uvalpha <= 4096U);
     877             : 
     878           0 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
     879           0 :         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
     880           0 :         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
     881           0 :         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha - (128 << 23)) >> 14;
     882           0 :         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha - (128 << 23)) >> 14;
     883             :         int R, G, B;
     884             : 
     885           0 :         Y1 -= c->yuv2rgb_y_offset;
     886           0 :         Y2 -= c->yuv2rgb_y_offset;
     887           0 :         Y1 *= c->yuv2rgb_y_coeff;
     888           0 :         Y2 *= c->yuv2rgb_y_coeff;
     889           0 :         Y1 += 1 << 13;
     890           0 :         Y2 += 1 << 13;
     891             : 
     892           0 :         R = V * c->yuv2rgb_v2r_coeff;
     893           0 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
     894           0 :         B =                            U * c->yuv2rgb_u2b_coeff;
     895             : 
     896           0 :         if (hasAlpha) {
     897           0 :             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 1;
     898           0 :             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1;
     899             : 
     900           0 :             A1 += 1 << 13;
     901           0 :             A2 += 1 << 13;
     902             :         }
     903             : 
     904           0 :         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
     905           0 :         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
     906           0 :         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
     907           0 :         if (eightbytes) {
     908           0 :             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
     909           0 :             output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
     910           0 :             output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
     911           0 :             output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
     912           0 :             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
     913           0 :             dest += 8;
     914             :         } else {
     915           0 :             output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
     916           0 :             output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
     917           0 :             output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
     918           0 :             dest += 6;
     919             :         }
     920             :     }
     921           0 : }
     922             : 
     923             : static av_always_inline void
     924       44900 : yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
     925             :                        const int32_t *ubuf[2], const int32_t *vbuf[2],
     926             :                        const int32_t *abuf0, uint16_t *dest, int dstW,
     927             :                        int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
     928             : {
     929       44900 :     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     930             :     int i;
     931       44900 :     int A1 = 0xffff<<14, A2= 0xffff<<14;
     932             : 
     933       44900 :     if (uvalpha < 2048) {
     934     7677000 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
     935     7632100 :             int Y1 = (buf0[i * 2]    ) >> 2;
     936     7632100 :             int Y2 = (buf0[i * 2 + 1]) >> 2;
     937     7632100 :             int U  = (ubuf0[i] - (128 << 11)) >> 2;
     938     7632100 :             int V  = (vbuf0[i] - (128 << 11)) >> 2;
     939             :             int R, G, B;
     940             : 
     941     7632100 :             Y1 -= c->yuv2rgb_y_offset;
     942     7632100 :             Y2 -= c->yuv2rgb_y_offset;
     943     7632100 :             Y1 *= c->yuv2rgb_y_coeff;
     944     7632100 :             Y2 *= c->yuv2rgb_y_coeff;
     945     7632100 :             Y1 += 1 << 13;
     946     7632100 :             Y2 += 1 << 13;
     947             : 
     948     7632100 :             if (hasAlpha) {
     949           0 :                 A1 = abuf0[i * 2    ] << 11;
     950           0 :                 A2 = abuf0[i * 2 + 1] << 11;
     951             : 
     952           0 :                 A1 += 1 << 13;
     953           0 :                 A2 += 1 << 13;
     954             :             }
     955             : 
     956     7632100 :             R = V * c->yuv2rgb_v2r_coeff;
     957     7632100 :             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
     958     7632100 :             B =                            U * c->yuv2rgb_u2b_coeff;
     959             : 
     960     7632100 :             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
     961     7632100 :             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
     962     7632100 :             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
     963     7632100 :             if (eightbytes) {
     964           0 :                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
     965           0 :                 output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
     966           0 :                 output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
     967           0 :                 output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
     968           0 :                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
     969           0 :                 dest += 8;
     970             :             } else {
     971     7632100 :                 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
     972     7632100 :                 output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
     973     7632100 :                 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
     974     7632100 :                 dest += 6;
     975             :             }
     976             :         }
     977             :     } else {
     978           0 :         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
     979           0 :         int A1 = 0xffff<<14, A2 = 0xffff<<14;
     980           0 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
     981           0 :             int Y1 = (buf0[i * 2]    ) >> 2;
     982           0 :             int Y2 = (buf0[i * 2 + 1]) >> 2;
     983           0 :             int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
     984           0 :             int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
     985             :             int R, G, B;
     986             : 
     987           0 :             Y1 -= c->yuv2rgb_y_offset;
     988           0 :             Y2 -= c->yuv2rgb_y_offset;
     989           0 :             Y1 *= c->yuv2rgb_y_coeff;
     990           0 :             Y2 *= c->yuv2rgb_y_coeff;
     991           0 :             Y1 += 1 << 13;
     992           0 :             Y2 += 1 << 13;
     993             : 
     994           0 :             if (hasAlpha) {
     995           0 :                 A1 = abuf0[i * 2    ] << 11;
     996           0 :                 A2 = abuf0[i * 2 + 1] << 11;
     997             : 
     998           0 :                 A1 += 1 << 13;
     999           0 :                 A2 += 1 << 13;
    1000             :             }
    1001             : 
    1002           0 :             R = V * c->yuv2rgb_v2r_coeff;
    1003           0 :             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1004           0 :             B =                            U * c->yuv2rgb_u2b_coeff;
    1005             : 
    1006           0 :             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
    1007           0 :             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
    1008           0 :             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
    1009           0 :             if (eightbytes) {
    1010           0 :                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
    1011           0 :                 output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
    1012           0 :                 output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
    1013           0 :                 output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
    1014           0 :                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
    1015           0 :                 dest += 8;
    1016             :             } else {
    1017           0 :                 output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
    1018           0 :                 output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
    1019           0 :                 output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
    1020           0 :                 dest += 6;
    1021             :             }
    1022             :         }
    1023             :     }
    1024       44900 : }
    1025             : 
    1026             : static av_always_inline void
    1027        1000 : yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
    1028             :                        const int32_t **lumSrc, int lumFilterSize,
    1029             :                        const int16_t *chrFilter, const int32_t **chrUSrc,
    1030             :                        const int32_t **chrVSrc, int chrFilterSize,
    1031             :                        const int32_t **alpSrc, uint16_t *dest, int dstW,
    1032             :                        int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
    1033             : {
    1034             :     int i;
    1035        1000 :     int A = 0xffff<<14;
    1036             : 
    1037      201000 :     for (i = 0; i < dstW; i++) {
    1038             :         int j;
    1039      200000 :         int Y  = -0x40000000;
    1040      200000 :         int U  = -(128 << 23); // 19
    1041      200000 :         int V  = -(128 << 23);
    1042             :         int R, G, B;
    1043             : 
    1044     2600000 :         for (j = 0; j < lumFilterSize; j++) {
    1045     2400000 :             Y += lumSrc[j][i]  * (unsigned)lumFilter[j];
    1046             :         }
    1047     2600000 :         for (j = 0; j < chrFilterSize; j++) {;
    1048     2400000 :             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
    1049     2400000 :             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
    1050             :         }
    1051             : 
    1052      200000 :         if (hasAlpha) {
    1053       80000 :             A = -0x40000000;
    1054     1040000 :             for (j = 0; j < lumFilterSize; j++) {
    1055      960000 :                 A += alpSrc[j][i] * (unsigned)lumFilter[j];
    1056             :             }
    1057       80000 :             A >>= 1;
    1058       80000 :             A += 0x20002000;
    1059             :         }
    1060             : 
    1061             :         // 8bit: 12+15=27; 16-bit: 12+19=31
    1062      200000 :         Y  >>= 14; // 10
    1063      200000 :         Y += 0x10000;
    1064      200000 :         U  >>= 14;
    1065      200000 :         V  >>= 14;
    1066             : 
    1067             :         // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
    1068      200000 :         Y -= c->yuv2rgb_y_offset;
    1069      200000 :         Y *= c->yuv2rgb_y_coeff;
    1070      200000 :         Y += 1 << 13; // 21
    1071             :         // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
    1072             : 
    1073      200000 :         R = V * c->yuv2rgb_v2r_coeff;
    1074      200000 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1075      200000 :         B =                            U * c->yuv2rgb_u2b_coeff;
    1076             : 
    1077             :         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
    1078      200000 :         output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
    1079      200000 :         output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
    1080      200000 :         output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
    1081      200000 :         if (eightbytes) {
    1082       80000 :             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
    1083       80000 :             dest += 4;
    1084             :         } else {
    1085      120000 :             dest += 3;
    1086             :         }
    1087             :     }
    1088        1000 : }
    1089             : 
    1090             : static av_always_inline void
    1091           0 : yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
    1092             :                        const int32_t *ubuf[2], const int32_t *vbuf[2],
    1093             :                        const int32_t *abuf[2], uint16_t *dest, int dstW,
    1094             :                        int yalpha, int uvalpha, int y,
    1095             :                        enum AVPixelFormat target, int hasAlpha, int eightbytes)
    1096             : {
    1097           0 :     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
    1098           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
    1099           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
    1100           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
    1101           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
    1102           0 :     int  yalpha1 = 4096 - yalpha;
    1103           0 :     int uvalpha1 = 4096 - uvalpha;
    1104             :     int i;
    1105           0 :     int A = 0xffff<<14;
    1106             : 
    1107             :     av_assert2(yalpha  <= 4096U);
    1108             :     av_assert2(uvalpha <= 4096U);
    1109             : 
    1110           0 :     for (i = 0; i < dstW; i++) {
    1111           0 :         int Y  = (buf0[i]     * yalpha1  + buf1[i]     * yalpha) >> 14;
    1112           0 :         int U  = (ubuf0[i]   * uvalpha1 + ubuf1[i]     * uvalpha - (128 << 23)) >> 14;
    1113           0 :         int V  = (vbuf0[i]   * uvalpha1 + vbuf1[i]     * uvalpha - (128 << 23)) >> 14;
    1114             :         int R, G, B;
    1115             : 
    1116           0 :         Y -= c->yuv2rgb_y_offset;
    1117           0 :         Y *= c->yuv2rgb_y_coeff;
    1118           0 :         Y += 1 << 13;
    1119             : 
    1120           0 :         R = V * c->yuv2rgb_v2r_coeff;
    1121           0 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1122           0 :         B =                            U * c->yuv2rgb_u2b_coeff;
    1123             : 
    1124           0 :         if (hasAlpha) {
    1125           0 :             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 1;
    1126             : 
    1127           0 :             A += 1 << 13;
    1128             :         }
    1129             : 
    1130           0 :         output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
    1131           0 :         output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
    1132           0 :         output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
    1133           0 :         if (eightbytes) {
    1134           0 :             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
    1135           0 :             dest += 4;
    1136             :         } else {
    1137           0 :             dest += 3;
    1138             :         }
    1139             :     }
    1140           0 : }
    1141             : 
    1142             : static av_always_inline void
    1143           0 : yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
    1144             :                        const int32_t *ubuf[2], const int32_t *vbuf[2],
    1145             :                        const int32_t *abuf0, uint16_t *dest, int dstW,
    1146             :                        int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
    1147             : {
    1148           0 :     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
    1149             :     int i;
    1150           0 :     int A = 0xffff<<14;
    1151             : 
    1152           0 :     if (uvalpha < 2048) {
    1153           0 :         for (i = 0; i < dstW; i++) {
    1154           0 :             int Y  = (buf0[i]) >> 2;
    1155           0 :             int U  = (ubuf0[i] - (128 << 11)) >> 2;
    1156           0 :             int V  = (vbuf0[i] - (128 << 11)) >> 2;
    1157             :             int R, G, B;
    1158             : 
    1159           0 :             Y -= c->yuv2rgb_y_offset;
    1160           0 :             Y *= c->yuv2rgb_y_coeff;
    1161           0 :             Y += 1 << 13;
    1162             : 
    1163           0 :             if (hasAlpha) {
    1164           0 :                 A = abuf0[i] << 11;
    1165             : 
    1166           0 :                 A += 1 << 13;
    1167             :             }
    1168             : 
    1169           0 :             R = V * c->yuv2rgb_v2r_coeff;
    1170           0 :             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1171           0 :             B =                            U * c->yuv2rgb_u2b_coeff;
    1172             : 
    1173           0 :             output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
    1174           0 :             output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
    1175           0 :             output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
    1176           0 :             if (eightbytes) {
    1177           0 :                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
    1178           0 :                 dest += 4;
    1179             :             } else {
    1180           0 :                 dest += 3;
    1181             :             }
    1182             :         }
    1183             :     } else {
    1184           0 :         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
    1185           0 :         int A = 0xffff<<14;
    1186           0 :         for (i = 0; i < dstW; i++) {
    1187           0 :             int Y  = (buf0[i]    ) >> 2;
    1188           0 :             int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
    1189           0 :             int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
    1190             :             int R, G, B;
    1191             : 
    1192           0 :             Y -= c->yuv2rgb_y_offset;
    1193           0 :             Y *= c->yuv2rgb_y_coeff;
    1194           0 :             Y += 1 << 13;
    1195             : 
    1196           0 :             if (hasAlpha) {
    1197           0 :                 A = abuf0[i] << 11;
    1198             : 
    1199           0 :                 A += 1 << 13;
    1200             :             }
    1201             : 
    1202           0 :             R = V * c->yuv2rgb_v2r_coeff;
    1203           0 :             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1204           0 :             B =                            U * c->yuv2rgb_u2b_coeff;
    1205             : 
    1206           0 :             output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
    1207           0 :             output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
    1208           0 :             output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
    1209           0 :             if (eightbytes) {
    1210           0 :                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
    1211           0 :                 dest += 4;
    1212             :             } else {
    1213           0 :                 dest += 3;
    1214             :             }
    1215             :         }
    1216             :     }
    1217           0 : }
    1218             : 
    1219             : #undef output_pixel
    1220             : #undef r_b
    1221             : #undef b_r
    1222             : 
    1223             : #define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
    1224             : static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
    1225             :                         const int16_t **_lumSrc, int lumFilterSize, \
    1226             :                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
    1227             :                         const int16_t **_chrVSrc, int chrFilterSize, \
    1228             :                         const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
    1229             :                         int y) \
    1230             : { \
    1231             :     const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
    1232             :                   **chrUSrc = (const int32_t **) _chrUSrc, \
    1233             :                   **chrVSrc = (const int32_t **) _chrVSrc, \
    1234             :                   **alpSrc  = (const int32_t **) _alpSrc; \
    1235             :     uint16_t *dest = (uint16_t *) _dest; \
    1236             :     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
    1237             :                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
    1238             :                           alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \
    1239             : } \
    1240             :  \
    1241             : static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
    1242             :                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
    1243             :                         const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
    1244             :                         int yalpha, int uvalpha, int y) \
    1245             : { \
    1246             :     const int32_t **buf  = (const int32_t **) _buf, \
    1247             :                   **ubuf = (const int32_t **) _ubuf, \
    1248             :                   **vbuf = (const int32_t **) _vbuf, \
    1249             :                   **abuf = (const int32_t **) _abuf; \
    1250             :     uint16_t *dest = (uint16_t *) _dest; \
    1251             :     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
    1252             :                           dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \
    1253             : } \
    1254             :  \
    1255             : static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
    1256             :                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
    1257             :                         const int16_t *_abuf0, uint8_t *_dest, int dstW, \
    1258             :                         int uvalpha, int y) \
    1259             : { \
    1260             :     const int32_t *buf0  = (const int32_t *)  _buf0, \
    1261             :                  **ubuf  = (const int32_t **) _ubuf, \
    1262             :                  **vbuf  = (const int32_t **) _vbuf, \
    1263             :                   *abuf0 = (const int32_t *)  _abuf0; \
    1264             :     uint16_t *dest = (uint16_t *) _dest; \
    1265             :     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
    1266             :                                   dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \
    1267             : }
    1268             : 
    1269       15552 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
    1270      105928 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
    1271        4032 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
    1272        4032 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
    1273           0 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
    1274           0 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
    1275        4032 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
    1276        8064 : YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
    1277           0 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
    1278           0 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
    1279        4032 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
    1280        4032 : YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
    1281             : 
    1282         200 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
    1283         200 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
    1284         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
    1285         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
    1286         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
    1287         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
    1288           0 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
    1289           0 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
    1290         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
    1291         100 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
    1292           0 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
    1293           0 : YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
    1294             : 
    1295             : /*
    1296             :  * Write out 2 RGB pixels in the target pixel format. This function takes a
    1297             :  * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
    1298             :  * things like endianness conversion and shifting. The caller takes care of
    1299             :  * setting the correct offset in these tables from the chroma (U/V) values.
    1300             :  * This function then uses the luminance (Y1/Y2) values to write out the
    1301             :  * correct RGB values into the destination buffer.
    1302             :  */
    1303             : static av_always_inline void
    1304   224943892 : yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
    1305             :               unsigned A1, unsigned A2,
    1306             :               const void *_r, const void *_g, const void *_b, int y,
    1307             :               enum AVPixelFormat target, int hasAlpha)
    1308             : {
    1309   224943892 :     if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA ||
    1310   256732380 :         target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) {
    1311    35596424 :         uint32_t *dest = (uint32_t *) _dest;
    1312    35596424 :         const uint32_t *r = (const uint32_t *) _r;
    1313    35596424 :         const uint32_t *g = (const uint32_t *) _g;
    1314    35596424 :         const uint32_t *b = (const uint32_t *) _b;
    1315             : 
    1316             : #if CONFIG_SMALL
    1317             :         int sh = hasAlpha ? ((target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24) : 0;
    1318             : 
    1319             :         dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
    1320             :         dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
    1321             : #else
    1322    35596424 :         if (hasAlpha) {
    1323           0 :             int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
    1324             : 
    1325             :             av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0);
    1326           0 :             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
    1327           0 :             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
    1328             :         } else {
    1329             : #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
    1330             :             int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
    1331             : 
    1332             :             av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF);
    1333             : #endif
    1334    35596424 :             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
    1335    35596424 :             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
    1336             :         }
    1337             : #endif
    1338   331804588 :     } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
    1339   142457120 :         uint8_t *dest = (uint8_t *) _dest;
    1340   142457120 :         const uint8_t *r = (const uint8_t *) _r;
    1341   142457120 :         const uint8_t *g = (const uint8_t *) _g;
    1342   142457120 :         const uint8_t *b = (const uint8_t *) _b;
    1343             : 
    1344             : #define r_b ((target == AV_PIX_FMT_RGB24) ? r : b)
    1345             : #define b_r ((target == AV_PIX_FMT_RGB24) ? b : r)
    1346             : 
    1347   142457120 :         dest[i * 6 + 0] = r_b[Y1];
    1348   142457120 :         dest[i * 6 + 1] =   g[Y1];
    1349   142457120 :         dest[i * 6 + 2] = b_r[Y1];
    1350   142457120 :         dest[i * 6 + 3] = r_b[Y2];
    1351   142457120 :         dest[i * 6 + 4] =   g[Y2];
    1352   142457120 :         dest[i * 6 + 5] = b_r[Y2];
    1353             : #undef r_b
    1354             : #undef b_r
    1355    46890348 :     } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
    1356     8707648 :                target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
    1357    46890348 :                target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
    1358    41061228 :         uint16_t *dest = (uint16_t *) _dest;
    1359    41061228 :         const uint16_t *r = (const uint16_t *) _r;
    1360    41061228 :         const uint16_t *g = (const uint16_t *) _g;
    1361    41061228 :         const uint16_t *b = (const uint16_t *) _b;
    1362             :         int dr1, dg1, db1, dr2, dg2, db2;
    1363             : 
    1364    41061228 :         if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
    1365    11473700 :             dr1 = ff_dither_2x2_8[ y & 1     ][0];
    1366    11473700 :             dg1 = ff_dither_2x2_4[ y & 1     ][0];
    1367    11473700 :             db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
    1368    11473700 :             dr2 = ff_dither_2x2_8[ y & 1     ][1];
    1369    11473700 :             dg2 = ff_dither_2x2_4[ y & 1     ][1];
    1370    11473700 :             db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
    1371    29587528 :         } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
    1372    26709000 :             dr1 = ff_dither_2x2_8[ y & 1     ][0];
    1373    26709000 :             dg1 = ff_dither_2x2_8[ y & 1     ][1];
    1374    26709000 :             db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
    1375    26709000 :             dr2 = ff_dither_2x2_8[ y & 1     ][1];
    1376    26709000 :             dg2 = ff_dither_2x2_8[ y & 1     ][0];
    1377    26709000 :             db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
    1378             :         } else {
    1379     2878528 :             dr1 = ff_dither_4x4_16[ y & 3     ][0];
    1380     2878528 :             dg1 = ff_dither_4x4_16[ y & 3     ][1];
    1381     2878528 :             db1 = ff_dither_4x4_16[(y & 3) ^ 3][0];
    1382     2878528 :             dr2 = ff_dither_4x4_16[ y & 3     ][1];
    1383     2878528 :             dg2 = ff_dither_4x4_16[ y & 3     ][0];
    1384     2878528 :             db2 = ff_dither_4x4_16[(y & 3) ^ 3][1];
    1385             :         }
    1386             : 
    1387    41061228 :         dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
    1388    41061228 :         dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
    1389             :     } else /* 8/4 bits */ {
    1390     5829120 :         uint8_t *dest = (uint8_t *) _dest;
    1391     5829120 :         const uint8_t *r = (const uint8_t *) _r;
    1392     5829120 :         const uint8_t *g = (const uint8_t *) _g;
    1393     5829120 :         const uint8_t *b = (const uint8_t *) _b;
    1394             :         int dr1, dg1, db1, dr2, dg2, db2;
    1395             : 
    1396     9580032 :         if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
    1397     3750912 :             const uint8_t * const d64 = ff_dither_8x8_73[y & 7];
    1398     3750912 :             const uint8_t * const d32 = ff_dither_8x8_32[y & 7];
    1399     3750912 :             dr1 = dg1 = d32[(i * 2 + 0) & 7];
    1400     3750912 :             db1 =       d64[(i * 2 + 0) & 7];
    1401     3750912 :             dr2 = dg2 = d32[(i * 2 + 1) & 7];
    1402     3750912 :             db2 =       d64[(i * 2 + 1) & 7];
    1403             :         } else {
    1404     2078208 :             const uint8_t * const d64  = ff_dither_8x8_73 [y & 7];
    1405     2078208 :             const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
    1406     2078208 :             dr1 = db1 = d128[(i * 2 + 0) & 7];
    1407     2078208 :             dg1 =        d64[(i * 2 + 0) & 7];
    1408     2078208 :             dr2 = db2 = d128[(i * 2 + 1) & 7];
    1409     2078208 :             dg2 =        d64[(i * 2 + 1) & 7];
    1410             :         }
    1411             : 
    1412     5829120 :         if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
    1413           0 :             dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
    1414           0 :                     ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
    1415             :         } else {
    1416     5829120 :             dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
    1417     5829120 :             dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
    1418             :         }
    1419             :     }
    1420   224943892 : }
    1421             : 
    1422             : static av_always_inline void
    1423     1195112 : yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
    1424             :                      const int16_t **lumSrc, int lumFilterSize,
    1425             :                      const int16_t *chrFilter, const int16_t **chrUSrc,
    1426             :                      const int16_t **chrVSrc, int chrFilterSize,
    1427             :                      const int16_t **alpSrc, uint8_t *dest, int dstW,
    1428             :                      int y, enum AVPixelFormat target, int hasAlpha)
    1429             : {
    1430             :     int i;
    1431             : 
    1432   203242704 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1433             :         int j, A1, A2;
    1434   202047592 :         int Y1 = 1 << 18;
    1435   202047592 :         int Y2 = 1 << 18;
    1436   202047592 :         int U  = 1 << 18;
    1437   202047592 :         int V  = 1 << 18;
    1438             :         const void *r, *g, *b;
    1439             : 
    1440   407791184 :         for (j = 0; j < lumFilterSize; j++) {
    1441   205743592 :             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
    1442   205743592 :             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
    1443             :         }
    1444  1011831560 :         for (j = 0; j < chrFilterSize; j++) {
    1445   809783968 :             U += chrUSrc[j][i] * chrFilter[j];
    1446   809783968 :             V += chrVSrc[j][i] * chrFilter[j];
    1447             :         }
    1448   202047592 :         Y1 >>= 19;
    1449   202047592 :         Y2 >>= 19;
    1450   202047592 :         U  >>= 19;
    1451   202047592 :         V  >>= 19;
    1452   202047592 :         if (hasAlpha) {
    1453           0 :             A1 = 1 << 18;
    1454           0 :             A2 = 1 << 18;
    1455           0 :             for (j = 0; j < lumFilterSize; j++) {
    1456           0 :                 A1 += alpSrc[j][i * 2    ] * lumFilter[j];
    1457           0 :                 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
    1458             :             }
    1459           0 :             A1 >>= 19;
    1460           0 :             A2 >>= 19;
    1461           0 :             if ((A1 | A2) & 0x100) {
    1462           0 :                 A1 = av_clip_uint8(A1);
    1463           0 :                 A2 = av_clip_uint8(A2);
    1464             :             }
    1465             :         }
    1466             : 
    1467   202047592 :         r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM];
    1468   202047592 :         g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
    1469   202047592 :         b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
    1470             : 
    1471   202047592 :         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
    1472             :                       r, g, b, y, target, hasAlpha);
    1473             :     }
    1474     1195112 : }
    1475             : 
    1476             : static av_always_inline void
    1477           0 : yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
    1478             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
    1479             :                      const int16_t *abuf[2], uint8_t *dest, int dstW,
    1480             :                      int yalpha, int uvalpha, int y,
    1481             :                      enum AVPixelFormat target, int hasAlpha)
    1482             : {
    1483           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
    1484           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
    1485           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
    1486           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
    1487           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
    1488           0 :     int  yalpha1 = 4096 - yalpha;
    1489           0 :     int uvalpha1 = 4096 - uvalpha;
    1490             :     int i;
    1491             :     av_assert2(yalpha  <= 4096U);
    1492             :     av_assert2(uvalpha <= 4096U);
    1493             : 
    1494           0 :     for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1495           0 :         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
    1496           0 :         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
    1497           0 :         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
    1498           0 :         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
    1499             :         int A1, A2;
    1500           0 :         const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
    1501           0 :                    *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
    1502           0 :                    *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
    1503             : 
    1504           0 :         if (hasAlpha) {
    1505           0 :             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
    1506           0 :             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
    1507           0 :             A1 = av_clip_uint8(A1);
    1508           0 :             A2 = av_clip_uint8(A2);
    1509             :         }
    1510             : 
    1511           0 :         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
    1512             :                       r, g, b, y, target, hasAlpha);
    1513             :     }
    1514           0 : }
    1515             : 
    1516             : static av_always_inline void
    1517      134700 : yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
    1518             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
    1519             :                      const int16_t *abuf0, uint8_t *dest, int dstW,
    1520             :                      int uvalpha, int y, enum AVPixelFormat target,
    1521             :                      int hasAlpha)
    1522             : {
    1523      134700 :     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
    1524             :     int i;
    1525             : 
    1526      134700 :     if (uvalpha < 2048) {
    1527    23031000 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1528    22896300 :             int Y1 = (buf0[i * 2    ] + 64) >> 7;
    1529    22896300 :             int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
    1530    22896300 :             int U  = (ubuf0[i]        + 64) >> 7;
    1531    22896300 :             int V  = (vbuf0[i]        + 64) >> 7;
    1532             :             int A1, A2;
    1533    22896300 :             const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
    1534    22896300 :                        *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
    1535    22896300 :                        *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
    1536             : 
    1537    22896300 :             if (hasAlpha) {
    1538           0 :                 A1 = abuf0[i * 2    ] * 255 + 16384 >> 15;
    1539           0 :                 A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15;
    1540           0 :                 A1 = av_clip_uint8(A1);
    1541           0 :                 A2 = av_clip_uint8(A2);
    1542             :             }
    1543             : 
    1544    22896300 :             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
    1545             :                           r, g, b, y, target, hasAlpha);
    1546             :         }
    1547             :     } else {
    1548           0 :         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
    1549           0 :         for (i = 0; i < ((dstW + 1) >> 1); i++) {
    1550           0 :             int Y1 = (buf0[i * 2    ]     +  64) >> 7;
    1551           0 :             int Y2 = (buf0[i * 2 + 1]     +  64) >> 7;
    1552           0 :             int U  = (ubuf0[i] + ubuf1[i] + 128) >> 8;
    1553           0 :             int V  = (vbuf0[i] + vbuf1[i] + 128) >> 8;
    1554             :             int A1, A2;
    1555           0 :             const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
    1556           0 :                        *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
    1557           0 :                        *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
    1558             : 
    1559           0 :             if (hasAlpha) {
    1560           0 :                 A1 = (abuf0[i * 2    ] + 64) >> 7;
    1561           0 :                 A2 = (abuf0[i * 2 + 1] + 64) >> 7;
    1562           0 :                 A1 = av_clip_uint8(A1);
    1563           0 :                 A2 = av_clip_uint8(A2);
    1564             :             }
    1565             : 
    1566           0 :             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
    1567             :                           r, g, b, y, target, hasAlpha);
    1568             :         }
    1569             :     }
    1570      134700 : }
    1571             : 
    1572             : #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
    1573             : static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
    1574             :                                 const int16_t **lumSrc, int lumFilterSize, \
    1575             :                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
    1576             :                                 const int16_t **chrVSrc, int chrFilterSize, \
    1577             :                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
    1578             :                                 int y) \
    1579             : { \
    1580             :     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
    1581             :                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
    1582             :                                   alpSrc, dest, dstW, y, fmt, hasAlpha); \
    1583             : }
    1584             : 
    1585             : #define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
    1586             : YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
    1587             : static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
    1588             :                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
    1589             :                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
    1590             :                                 int yalpha, int uvalpha, int y) \
    1591             : { \
    1592             :     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
    1593             :                                   dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
    1594             : }
    1595             : 
    1596             : #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
    1597             : YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
    1598             : static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
    1599             :                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
    1600             :                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
    1601             :                                 int uvalpha, int y) \
    1602             : { \
    1603             :     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
    1604             :                                   dstW, uvalpha, y, fmt, hasAlpha); \
    1605             : }
    1606             : 
    1607             : #if CONFIG_SMALL
    1608             : YUV2RGBWRAPPER(yuv2rgb,,  32_1,  AV_PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1609             : YUV2RGBWRAPPER(yuv2rgb,,  32,    AV_PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1610             : #else
    1611             : #if CONFIG_SWSCALE_ALPHA
    1612           0 : YUV2RGBWRAPPER(yuv2rgb,, a32_1,  AV_PIX_FMT_RGB32_1,   1)
    1613           0 : YUV2RGBWRAPPER(yuv2rgb,, a32,    AV_PIX_FMT_RGB32,     1)
    1614             : #endif
    1615       24336 : YUV2RGBWRAPPER(yuv2rgb,, x32_1,  AV_PIX_FMT_RGB32_1,   0)
    1616      183688 : YUV2RGBWRAPPER(yuv2rgb,, x32,    AV_PIX_FMT_RGB32,     0)
    1617             : #endif
    1618      698548 : YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24,   0)
    1619      151692 : YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24,   0)
    1620       66900 : YUV2RGBWRAPPER(yuv2rgb,,  16,    AV_PIX_FMT_RGB565,    0)
    1621      155000 : YUV2RGBWRAPPER(yuv2rgb,,  15,    AV_PIX_FMT_RGB555,    0)
    1622       16528 : YUV2RGBWRAPPER(yuv2rgb,,  12,    AV_PIX_FMT_RGB444,    0)
    1623       21312 : YUV2RGBWRAPPER(yuv2rgb,,   8,    AV_PIX_FMT_RGB8,      0)
    1624           0 : YUV2RGBWRAPPER(yuv2rgb,,   4,    AV_PIX_FMT_RGB4,      0)
    1625       11808 : YUV2RGBWRAPPER(yuv2rgb,,   4b,   AV_PIX_FMT_RGB4_BYTE, 0)
    1626             : 
    1627   110778895 : static av_always_inline void yuv2rgb_write_full(SwsContext *c,
    1628             :     uint8_t *dest, int i, int Y, int A, int U, int V,
    1629             :     int y, enum AVPixelFormat target, int hasAlpha, int err[4])
    1630             : {
    1631             :     int R, G, B;
    1632   110778895 :     int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8;
    1633             : 
    1634   110778895 :     Y -= c->yuv2rgb_y_offset;
    1635   110778895 :     Y *= c->yuv2rgb_y_coeff;
    1636   110778895 :     Y += 1 << 21;
    1637   110778895 :     R = Y + V*c->yuv2rgb_v2r_coeff;
    1638   110778895 :     G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
    1639   110778895 :     B = Y +                          U*c->yuv2rgb_u2b_coeff;
    1640   110778895 :     if ((R | G | B) & 0xC0000000) {
    1641    10039614 :         R = av_clip_uintp2(R, 30);
    1642    10039614 :         G = av_clip_uintp2(G, 30);
    1643    10039614 :         B = av_clip_uintp2(B, 30);
    1644             :     }
    1645             : 
    1646   110778895 :     switch(target) {
    1647       40000 :     case AV_PIX_FMT_ARGB:
    1648       40000 :         dest[0] = hasAlpha ? A : 255;
    1649       40000 :         dest[1] = R >> 22;
    1650       40000 :         dest[2] = G >> 22;
    1651       40000 :         dest[3] = B >> 22;
    1652       40000 :         break;
    1653    15611124 :     case AV_PIX_FMT_RGB24:
    1654    15611124 :         dest[0] = R >> 22;
    1655    15611124 :         dest[1] = G >> 22;
    1656    15611124 :         dest[2] = B >> 22;
    1657    15611124 :         break;
    1658       40000 :     case AV_PIX_FMT_RGBA:
    1659       40000 :         dest[0] = R >> 22;
    1660       40000 :         dest[1] = G >> 22;
    1661       40000 :         dest[2] = B >> 22;
    1662       40000 :         dest[3] = hasAlpha ? A : 255;
    1663       40000 :         break;
    1664       40000 :     case AV_PIX_FMT_ABGR:
    1665       40000 :         dest[0] = hasAlpha ? A : 255;
    1666       40000 :         dest[1] = B >> 22;
    1667       40000 :         dest[2] = G >> 22;
    1668       40000 :         dest[3] = R >> 22;
    1669       40000 :         break;
    1670    30548400 :     case AV_PIX_FMT_BGR24:
    1671    30548400 :         dest[0] = B >> 22;
    1672    30548400 :         dest[1] = G >> 22;
    1673    30548400 :         dest[2] = R >> 22;
    1674    30548400 :         break;
    1675    23667386 :     case AV_PIX_FMT_BGRA:
    1676    23667386 :         dest[0] = B >> 22;
    1677    23667386 :         dest[1] = G >> 22;
    1678    23667386 :         dest[2] = R >> 22;
    1679    23667386 :         dest[3] = hasAlpha ? A : 255;
    1680    23667386 :         break;
    1681    40831985 :     case AV_PIX_FMT_BGR4_BYTE:
    1682             :     case AV_PIX_FMT_RGB4_BYTE:
    1683             :     case AV_PIX_FMT_BGR8:
    1684             :     case AV_PIX_FMT_RGB8:
    1685             :     {
    1686             :         int r,g,b;
    1687             : 
    1688    40831985 :         switch (c->dither) {
    1689    40831985 :         default:
    1690             :         case SWS_DITHER_AUTO:
    1691             :         case SWS_DITHER_ED:
    1692    40831985 :             R >>= 22;
    1693    40831985 :             G >>= 22;
    1694    40831985 :             B >>= 22;
    1695    40831985 :             R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
    1696    40831985 :             G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4;
    1697    40831985 :             B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4;
    1698    40831985 :             c->dither_error[0][i] = err[0];
    1699    40831985 :             c->dither_error[1][i] = err[1];
    1700    40831985 :             c->dither_error[2][i] = err[2];
    1701    40831985 :             r = R >> (isrgb8 ? 5 : 7);
    1702    40831985 :             g = G >> (isrgb8 ? 5 : 6);
    1703    40831985 :             b = B >> (isrgb8 ? 6 : 7);
    1704    40831985 :             r = av_clip(r, 0, isrgb8 ? 7 : 1);
    1705    40831985 :             g = av_clip(g, 0, isrgb8 ? 7 : 3);
    1706    40831985 :             b = av_clip(b, 0, isrgb8 ? 3 : 1);
    1707    40831985 :             err[0] = R - r*(isrgb8 ? 36 : 255);
    1708    40831985 :             err[1] = G - g*(isrgb8 ? 36 : 85);
    1709    40831985 :             err[2] = B - b*(isrgb8 ? 85 : 255);
    1710    40831985 :             break;
    1711           0 :         case SWS_DITHER_A_DITHER:
    1712           0 :             if (isrgb8) {
    1713             :   /* see http://pippin.gimp.org/a_dither/ for details/origin */
    1714             : #define A_DITHER(u,v)   (((((u)+((v)*236))*119)&0xff))
    1715           0 :                 r = (((R >> 19) + A_DITHER(i,y)  -96)>>8);
    1716           0 :                 g = (((G >> 19) + A_DITHER(i + 17,y) - 96)>>8);
    1717           0 :                 b = (((B >> 20) + A_DITHER(i + 17*2,y) -96)>>8);
    1718           0 :                 r = av_clip_uintp2(r, 3);
    1719           0 :                 g = av_clip_uintp2(g, 3);
    1720           0 :                 b = av_clip_uintp2(b, 2);
    1721             :             } else {
    1722           0 :                 r = (((R >> 21) + A_DITHER(i,y)-256)>>8);
    1723           0 :                 g = (((G >> 19) + A_DITHER(i + 17,y)-256)>>8);
    1724           0 :                 b = (((B >> 21) + A_DITHER(i + 17*2,y)-256)>>8);
    1725           0 :                 r = av_clip_uintp2(r, 1);
    1726           0 :                 g = av_clip_uintp2(g, 2);
    1727           0 :                 b = av_clip_uintp2(b, 1);
    1728             :             }
    1729           0 :             break;
    1730           0 :         case SWS_DITHER_X_DITHER:
    1731           0 :             if (isrgb8) {
    1732             :   /* see http://pippin.gimp.org/a_dither/ for details/origin */
    1733             : #define X_DITHER(u,v)   (((((u)^((v)*237))*181)&0x1ff)/2)
    1734           0 :                 r = (((R >> 19) + X_DITHER(i,y) - 96)>>8);
    1735           0 :                 g = (((G >> 19) + X_DITHER(i + 17,y) - 96)>>8);
    1736           0 :                 b = (((B >> 20) + X_DITHER(i + 17*2,y) - 96)>>8);
    1737           0 :                 r = av_clip_uintp2(r, 3);
    1738           0 :                 g = av_clip_uintp2(g, 3);
    1739           0 :                 b = av_clip_uintp2(b, 2);
    1740             :             } else {
    1741           0 :                 r = (((R >> 21) + X_DITHER(i,y)-256)>>8);
    1742           0 :                 g = (((G >> 19) + X_DITHER(i + 17,y)-256)>>8);
    1743           0 :                 b = (((B >> 21) + X_DITHER(i + 17*2,y)-256)>>8);
    1744           0 :                 r = av_clip_uintp2(r, 1);
    1745           0 :                 g = av_clip_uintp2(g, 2);
    1746           0 :                 b = av_clip_uintp2(b, 1);
    1747             :             }
    1748             : 
    1749           0 :             break;
    1750             :         }
    1751             : 
    1752    40831985 :         if(target == AV_PIX_FMT_BGR4_BYTE) {
    1753     8166397 :             dest[0] = r + 2*g + 8*b;
    1754    32665588 :         } else if(target == AV_PIX_FMT_RGB4_BYTE) {
    1755     8166397 :             dest[0] = b + 2*g + 8*r;
    1756    24499191 :         } else if(target == AV_PIX_FMT_BGR8) {
    1757    16332794 :             dest[0] = r + 8*g + 64*b;
    1758     8166397 :         } else if(target == AV_PIX_FMT_RGB8) {
    1759     8166397 :             dest[0] = b + 4*g + 32*r;
    1760             :         } else
    1761             :             av_assert2(0);
    1762    40831985 :         break;}
    1763             :     }
    1764   110778895 : }
    1765             : 
    1766             : static av_always_inline void
    1767        7656 : yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
    1768             :                           const int16_t **lumSrc, int lumFilterSize,
    1769             :                           const int16_t *chrFilter, const int16_t **chrUSrc,
    1770             :                           const int16_t **chrVSrc, int chrFilterSize,
    1771             :                           const int16_t **alpSrc, uint8_t *dest,
    1772             :                           int dstW, int y, enum AVPixelFormat target, int hasAlpha)
    1773             : {
    1774             :     int i;
    1775        7656 :     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
    1776        7656 :     int err[4] = {0};
    1777        7656 :     int A = 0; //init to silence warning
    1778             : 
    1779        7656 :     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
    1780        7456 :        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
    1781         500 :         step = 1;
    1782             : 
    1783     3633742 :     for (i = 0; i < dstW; i++) {
    1784             :         int j;
    1785     3626086 :         int Y = 1<<9;
    1786     3626086 :         int U = (1<<9)-(128 << 19);
    1787     3626086 :         int V = (1<<9)-(128 << 19);
    1788             : 
    1789    20564230 :         for (j = 0; j < lumFilterSize; j++) {
    1790    16938144 :             Y += lumSrc[j][i] * lumFilter[j];
    1791             :         }
    1792    20564230 :         for (j = 0; j < chrFilterSize; j++) {
    1793    16938144 :             U += chrUSrc[j][i] * chrFilter[j];
    1794    16938144 :             V += chrVSrc[j][i] * chrFilter[j];
    1795             :         }
    1796     3626086 :         Y >>= 10;
    1797     3626086 :         U >>= 10;
    1798     3626086 :         V >>= 10;
    1799     3626086 :         if (hasAlpha) {
    1800     3339386 :             A = 1 << 18;
    1801    17976930 :             for (j = 0; j < lumFilterSize; j++) {
    1802    14637544 :                 A += alpSrc[j][i] * lumFilter[j];
    1803             :             }
    1804     3339386 :             A >>= 19;
    1805     3339386 :             if (A & 0x100)
    1806           0 :                 A = av_clip_uint8(A);
    1807             :         }
    1808     3626086 :         yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
    1809     3626086 :         dest += step;
    1810             :     }
    1811        7656 :     c->dither_error[0][i] = err[0];
    1812        7656 :     c->dither_error[1][i] = err[1];
    1813        7656 :     c->dither_error[2][i] = err[2];
    1814        7656 : }
    1815             : 
    1816             : static av_always_inline void
    1817           0 : yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2],
    1818             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
    1819             :                      const int16_t *abuf[2], uint8_t *dest, int dstW,
    1820             :                      int yalpha, int uvalpha, int y,
    1821             :                      enum AVPixelFormat target, int hasAlpha)
    1822             : {
    1823           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
    1824           0 :                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
    1825           0 :                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
    1826           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
    1827           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
    1828           0 :     int  yalpha1 = 4096 - yalpha;
    1829           0 :     int uvalpha1 = 4096 - uvalpha;
    1830             :     int i;
    1831           0 :     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
    1832           0 :     int err[4] = {0};
    1833           0 :     int A = 0; // init to silcene warning
    1834             : 
    1835             :     av_assert2(yalpha  <= 4096U);
    1836             :     av_assert2(uvalpha <= 4096U);
    1837             : 
    1838           0 :     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
    1839           0 :        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
    1840           0 :         step = 1;
    1841             : 
    1842           0 :     for (i = 0; i < dstW; i++) {
    1843           0 :         int Y = ( buf0[i] * yalpha1  +  buf1[i] * yalpha             ) >> 10; //FIXME rounding
    1844           0 :         int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10;
    1845           0 :         int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10;
    1846             : 
    1847           0 :         if (hasAlpha) {
    1848           0 :             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19;
    1849           0 :             if (A & 0x100)
    1850           0 :                 A = av_clip_uint8(A);
    1851             :         }
    1852             : 
    1853           0 :         yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
    1854           0 :         dest += step;
    1855             :     }
    1856           0 :     c->dither_error[0][i] = err[0];
    1857           0 :     c->dither_error[1][i] = err[1];
    1858           0 :     c->dither_error[2][i] = err[2];
    1859           0 : }
    1860             : 
    1861             : static av_always_inline void
    1862      387713 : yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0,
    1863             :                      const int16_t *ubuf[2], const int16_t *vbuf[2],
    1864             :                      const int16_t *abuf0, uint8_t *dest, int dstW,
    1865             :                      int uvalpha, int y, enum AVPixelFormat target,
    1866             :                      int hasAlpha)
    1867             : {
    1868      387713 :     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
    1869             :     int i;
    1870      387713 :     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
    1871      387713 :     int err[4] = {0};
    1872             : 
    1873      387713 :     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
    1874      312631 :        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
    1875      187705 :         step = 1;
    1876             : 
    1877      387713 :     if (uvalpha < 2048) {
    1878      387713 :         int A = 0; //init to silence warning
    1879   107540522 :         for (i = 0; i < dstW; i++) {
    1880   107152809 :             int Y = buf0[i] << 2;
    1881   107152809 :             int U = (ubuf0[i] - (128<<7)) * 4;
    1882   107152809 :             int V = (vbuf0[i] - (128<<7)) * 4;
    1883             : 
    1884   107152809 :             if (hasAlpha) {
    1885           0 :                 A = (abuf0[i] + 64) >> 7;
    1886           0 :                 if (A & 0x100)
    1887           0 :                     A = av_clip_uint8(A);
    1888             :             }
    1889             : 
    1890   107152809 :             yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
    1891   107152809 :             dest += step;
    1892             :         }
    1893             :     } else {
    1894           0 :         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
    1895           0 :         int A = 0; //init to silence warning
    1896           0 :         for (i = 0; i < dstW; i++) {
    1897           0 :             int Y = buf0[i] << 2;
    1898           0 :             int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1;
    1899           0 :             int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1;
    1900             : 
    1901           0 :             if (hasAlpha) {
    1902           0 :                 A = (abuf0[i] + 64) >> 7;
    1903           0 :                 if (A & 0x100)
    1904           0 :                     A = av_clip_uint8(A);
    1905             :             }
    1906             : 
    1907           0 :             yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
    1908           0 :             dest += step;
    1909             :         }
    1910             :     }
    1911             : 
    1912      387713 :     c->dither_error[0][i] = err[0];
    1913      387713 :     c->dither_error[1][i] = err[1];
    1914      387713 :     c->dither_error[2][i] = err[2];
    1915      387713 : }
    1916             : 
    1917             : #if CONFIG_SMALL
    1918             : YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1919             : YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1920             : YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1921             : YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
    1922             : #else
    1923             : #if CONFIG_SWSCALE_ALPHA
    1924        5366 : YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  1)
    1925         200 : YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  1)
    1926         200 : YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  1)
    1927         200 : YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  1)
    1928             : #endif
    1929       63900 : YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA,  0)
    1930           0 : YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR,  0)
    1931           0 : YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA,  0)
    1932           0 : YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB,  0)
    1933             : #endif
    1934       89900 : YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full,  AV_PIX_FMT_BGR24, 0)
    1935       47398 : YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full,  AV_PIX_FMT_RGB24, 0)
    1936             : 
    1937       37641 : YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full,  AV_PIX_FMT_BGR4_BYTE, 0)
    1938       37641 : YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full,  AV_PIX_FMT_RGB4_BYTE, 0)
    1939       75282 : YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full,   AV_PIX_FMT_BGR8,  0)
    1940       37641 : YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full,   AV_PIX_FMT_RGB8,  0)
    1941             : 
    1942             : static void
    1943      105516 : yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
    1944             :                   const int16_t **lumSrc, int lumFilterSize,
    1945             :                   const int16_t *chrFilter, const int16_t **chrUSrc,
    1946             :                   const int16_t **chrVSrc, int chrFilterSize,
    1947             :                   const int16_t **alpSrc, uint8_t **dest,
    1948             :                   int dstW, int y)
    1949             : {
    1950      105516 :     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
    1951             :     int i;
    1952      105516 :     int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc;
    1953      105516 :     uint16_t **dest16 = (uint16_t**)dest;
    1954      105516 :     int SH = 22 + 8 - desc->comp[0].depth;
    1955      105516 :     int A = 0; // init to silence warning
    1956             : 
    1957    36926604 :     for (i = 0; i < dstW; i++) {
    1958             :         int j;
    1959    36821088 :         int Y = 1 << 9;
    1960    36821088 :         int U = (1 << 9) - (128 << 19);
    1961    36821088 :         int V = (1 << 9) - (128 << 19);
    1962             :         int R, G, B;
    1963             : 
    1964    76722176 :         for (j = 0; j < lumFilterSize; j++)
    1965    39901088 :             Y += lumSrc[j][i] * lumFilter[j];
    1966             : 
    1967   186208256 :         for (j = 0; j < chrFilterSize; j++) {
    1968   149387168 :             U += chrUSrc[j][i] * chrFilter[j];
    1969   149387168 :             V += chrVSrc[j][i] * chrFilter[j];
    1970             :         }
    1971             : 
    1972    36821088 :         Y >>= 10;
    1973    36821088 :         U >>= 10;
    1974    36821088 :         V >>= 10;
    1975             : 
    1976    36821088 :         if (hasAlpha) {
    1977      145728 :             A = 1 << 18;
    1978             : 
    1979     1391456 :             for (j = 0; j < lumFilterSize; j++)
    1980     1245728 :                 A += alpSrc[j][i] * lumFilter[j];
    1981             : 
    1982      145728 :             if (A & 0xF8000000)
    1983       40000 :                 A =  av_clip_uintp2(A, 27);
    1984             :         }
    1985             : 
    1986    36821088 :         Y -= c->yuv2rgb_y_offset;
    1987    36821088 :         Y *= c->yuv2rgb_y_coeff;
    1988    36821088 :         Y += 1 << (SH-1);
    1989    36821088 :         R = Y + V * c->yuv2rgb_v2r_coeff;
    1990    36821088 :         G = Y + V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    1991    36821088 :         B = Y +                            U * c->yuv2rgb_u2b_coeff;
    1992             : 
    1993    36821088 :         if ((R | G | B) & 0xC0000000) {
    1994     7133678 :             R = av_clip_uintp2(R, 30);
    1995     7133678 :             G = av_clip_uintp2(G, 30);
    1996     7133678 :             B = av_clip_uintp2(B, 30);
    1997             :         }
    1998             : 
    1999    36821088 :         if (SH != 22) {
    2000    21123456 :             dest16[0][i] = G >> SH;
    2001    21123456 :             dest16[1][i] = B >> SH;
    2002    21123456 :             dest16[2][i] = R >> SH;
    2003    21123456 :             if (hasAlpha)
    2004       80000 :                 dest16[3][i] = A >> (SH - 3);
    2005             :         } else {
    2006    15697632 :             dest[0][i] = G >> 22;
    2007    15697632 :             dest[1][i] = B >> 22;
    2008    15697632 :             dest[2][i] = R >> 22;
    2009    15697632 :             if (hasAlpha)
    2010       65728 :                 dest[3][i] = A >> 19;
    2011             :         }
    2012             :     }
    2013      105516 :     if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
    2014     8660376 :         for (i = 0; i < dstW; i++) {
    2015     8635584 :             dest16[0][i] = av_bswap16(dest16[0][i]);
    2016     8635584 :             dest16[1][i] = av_bswap16(dest16[1][i]);
    2017     8635584 :             dest16[2][i] = av_bswap16(dest16[2][i]);
    2018     8635584 :             if (hasAlpha)
    2019       40000 :                 dest16[3][i] = av_bswap16(dest16[3][i]);
    2020             :         }
    2021             :     }
    2022      105516 : }
    2023             : 
    2024             : static void
    2025       46840 : yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter,
    2026             :                     const int16_t **lumSrcx, int lumFilterSize,
    2027             :                     const int16_t *chrFilter, const int16_t **chrUSrcx,
    2028             :                     const int16_t **chrVSrcx, int chrFilterSize,
    2029             :                     const int16_t **alpSrcx, uint8_t **dest,
    2030             :                     int dstW, int y)
    2031             : {
    2032       46840 :     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
    2033             :     int i;
    2034       46840 :     int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
    2035       46840 :     uint16_t **dest16 = (uint16_t**)dest;
    2036       46840 :     const int32_t **lumSrc  = (const int32_t**)lumSrcx;
    2037       46840 :     const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
    2038       46840 :     const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
    2039       46840 :     const int32_t **alpSrc  = (const int32_t**)alpSrcx;
    2040             : 
    2041    16372920 :     for (i = 0; i < dstW; i++) {
    2042             :         int j;
    2043    16326080 :         int Y = -0x40000000;
    2044    16326080 :         int U = -(128 << 23);
    2045    16326080 :         int V = -(128 << 23);
    2046             :         int R, G, B, A;
    2047             : 
    2048    33532160 :         for (j = 0; j < lumFilterSize; j++)
    2049    17206080 :             Y += lumSrc[j][i] * (unsigned)lumFilter[j];
    2050             : 
    2051    82192640 :         for (j = 0; j < chrFilterSize; j++) {
    2052    65866560 :             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
    2053    65866560 :             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
    2054             :         }
    2055             : 
    2056    16326080 :         Y >>= 14;
    2057    16326080 :         Y += 0x10000;
    2058    16326080 :         U >>= 14;
    2059    16326080 :         V >>= 14;
    2060             : 
    2061    16326080 :         if (hasAlpha) {
    2062       52960 :             A = -0x40000000;
    2063             : 
    2064      545920 :             for (j = 0; j < lumFilterSize; j++)
    2065      492960 :                 A += alpSrc[j][i] * lumFilter[j];
    2066             : 
    2067       52960 :             A >>= 1;
    2068       52960 :             A += 0x20002000;
    2069             :         }
    2070             : 
    2071    16326080 :         Y -= c->yuv2rgb_y_offset;
    2072    16326080 :         Y *= c->yuv2rgb_y_coeff;
    2073    16326080 :         Y += 1 << 13;
    2074    16326080 :         R = V * c->yuv2rgb_v2r_coeff;
    2075    16326080 :         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
    2076    16326080 :         B =                            U * c->yuv2rgb_u2b_coeff;
    2077             : 
    2078    16326080 :         R = av_clip_uintp2(Y + R, 30);
    2079    16326080 :         G = av_clip_uintp2(Y + G, 30);
    2080    16326080 :         B = av_clip_uintp2(Y + B, 30);
    2081             : 
    2082    16326080 :         dest16[0][i] = G >> 14;
    2083    16326080 :         dest16[1][i] = B >> 14;
    2084    16326080 :         dest16[2][i] = R >> 14;
    2085    16326080 :         if (hasAlpha)
    2086       52960 :             dest16[3][i] = av_clip_uintp2(A, 30) >> 14;
    2087             :     }
    2088       46840 :     if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
    2089    13079472 :         for (i = 0; i < dstW; i++) {
    2090    13042048 :             dest16[0][i] = av_bswap16(dest16[0][i]);
    2091    13042048 :             dest16[1][i] = av_bswap16(dest16[1][i]);
    2092    13042048 :             dest16[2][i] = av_bswap16(dest16[2][i]);
    2093    13042048 :             if (hasAlpha)
    2094       32960 :                 dest16[3][i] = av_bswap16(dest16[3][i]);
    2095             :         }
    2096             :     }
    2097       46840 : }
    2098             : 
    2099             : static void
    2100           0 : yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
    2101             :             const int16_t *ubuf[2], const int16_t *vbuf[2],
    2102             :             const int16_t *abuf0, uint8_t *dest, int dstW,
    2103             :             int uvalpha, int y)
    2104             : {
    2105           0 :     int hasAlpha = !!abuf0;
    2106             :     int i;
    2107             : 
    2108           0 :     for (i = 0; i < dstW; i++) {
    2109           0 :         int Y = (buf0[i] + 64) >> 7;
    2110             :         int A;
    2111             : 
    2112           0 :         Y = av_clip_uint8(Y);
    2113             : 
    2114           0 :         if (hasAlpha) {
    2115           0 :             A = (abuf0[i] + 64) >> 7;
    2116           0 :             if (A & 0x100)
    2117           0 :                 A = av_clip_uint8(A);
    2118             :         }
    2119             : 
    2120           0 :         dest[i * 2    ] = Y;
    2121           0 :         dest[i * 2 + 1] = hasAlpha ? A : 255;
    2122             :     }
    2123           0 : }
    2124             : 
    2125             : static void
    2126           0 : yuv2ya8_2_c(SwsContext *c, const int16_t *buf[2],
    2127             :             const int16_t *ubuf[2], const int16_t *vbuf[2],
    2128             :             const int16_t *abuf[2], uint8_t *dest, int dstW,
    2129             :             int yalpha, int uvalpha, int y)
    2130             : {
    2131           0 :     int hasAlpha = abuf && abuf[0] && abuf[1];
    2132           0 :     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
    2133           0 :                   *abuf0 = hasAlpha ? abuf[0] : NULL,
    2134           0 :                   *abuf1 = hasAlpha ? abuf[1] : NULL;
    2135           0 :     int  yalpha1 = 4096 - yalpha;
    2136             :     int i;
    2137             : 
    2138             :     av_assert2(yalpha  <= 4096U);
    2139             : 
    2140           0 :     for (i = 0; i < dstW; i++) {
    2141           0 :         int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 19;
    2142             :         int A;
    2143             : 
    2144           0 :         Y = av_clip_uint8(Y);
    2145             : 
    2146           0 :         if (hasAlpha) {
    2147           0 :             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 19;
    2148           0 :             A = av_clip_uint8(A);
    2149             :         }
    2150             : 
    2151           0 :         dest[i * 2    ] = Y;
    2152           0 :         dest[i * 2 + 1] = hasAlpha ? A : 255;
    2153             :     }
    2154           0 : }
    2155             : 
    2156             : static void
    2157        4420 : yuv2ya8_X_c(SwsContext *c, const int16_t *lumFilter,
    2158             :             const int16_t **lumSrc, int lumFilterSize,
    2159             :             const int16_t *chrFilter, const int16_t **chrUSrc,
    2160             :             const int16_t **chrVSrc, int chrFilterSize,
    2161             :             const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
    2162             : {
    2163        4420 :     int hasAlpha = !!alpSrc;
    2164             :     int i;
    2165             : 
    2166     1545060 :     for (i = 0; i < dstW; i++) {
    2167             :         int j;
    2168     1540640 :         int Y = 1 << 18, A = 1 << 18;
    2169             : 
    2170     3301280 :         for (j = 0; j < lumFilterSize; j++)
    2171     1760640 :             Y += lumSrc[j][i] * lumFilter[j];
    2172             : 
    2173     1540640 :         Y >>= 19;
    2174     1540640 :         if (Y  & 0x100)
    2175        8564 :             Y = av_clip_uint8(Y);
    2176             : 
    2177     1540640 :         if (hasAlpha) {
    2178      260000 :             for (j = 0; j < lumFilterSize; j++)
    2179      240000 :                 A += alpSrc[j][i] * lumFilter[j];
    2180             : 
    2181       20000 :             A >>= 19;
    2182             : 
    2183       20000 :             if (A & 0x100)
    2184           0 :                 A = av_clip_uint8(A);
    2185             :         }
    2186             : 
    2187     1540640 :         dest[2 * i    ] = Y;
    2188     1540640 :         dest[2 * i + 1] = hasAlpha ? A : 255;
    2189             :     }
    2190        4420 : }
    2191             : 
    2192             : static void
    2193        4132 : yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter,
    2194             :                  const int16_t **_lumSrc, int lumFilterSize,
    2195             :                  const int16_t *chrFilter, const int16_t **_chrUSrc,
    2196             :                  const int16_t **_chrVSrc, int chrFilterSize,
    2197             :                  const int16_t **_alpSrc, uint8_t *dest, int dstW, int y)
    2198             : {
    2199        4132 :     const int32_t **lumSrc  = (const int32_t **) _lumSrc,
    2200        4132 :                   **chrUSrc = (const int32_t **) _chrUSrc,
    2201        4132 :                   **chrVSrc = (const int32_t **) _chrVSrc,
    2202        4132 :                   **alpSrc  = (const int32_t **) _alpSrc;
    2203        4132 :     int hasAlpha = !!alpSrc;
    2204             :     int i;
    2205             : 
    2206     1443396 :     for (i = 0; i < dstW; i++) {
    2207     1439264 :         int Y = 1 << 14, U = 1 << 14;
    2208     1439264 :         int V = 1 << 14, A = 1 << 14;
    2209             :         int j;
    2210             : 
    2211     1439264 :         Y -= 0x40000000;
    2212     1439264 :         U -= 0x40000000;
    2213     1439264 :         V -= 0x40000000;
    2214     1439264 :         A -= 0x40000000;
    2215             : 
    2216     3098528 :         for (j = 0; j < lumFilterSize; j++)
    2217     1659264 :             Y += lumSrc[j][i] * (unsigned)lumFilter[j];
    2218             : 
    2219     7356320 :         for (j = 0; j < chrFilterSize; j++)
    2220     5917056 :             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
    2221             : 
    2222     7356320 :         for (j = 0; j < chrFilterSize; j++)
    2223     5917056 :             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
    2224             : 
    2225     1439264 :         if (hasAlpha)
    2226      260000 :             for (j = 0; j < lumFilterSize; j++)
    2227      240000 :                 A += alpSrc[j][i] * (unsigned)lumFilter[j];
    2228             : 
    2229     1439264 :         Y = 0x8000 + av_clip_int16(Y >> 15);
    2230     1439264 :         U = 0x8000 + av_clip_int16(U >> 15);
    2231     1439264 :         V = 0x8000 + av_clip_int16(V >> 15);
    2232     1439264 :         A = 0x8000 + av_clip_int16(A >> 15);
    2233             : 
    2234     1439264 :         AV_WL16(dest + 8 * i, hasAlpha ? A : 65535);
    2235     1439264 :         AV_WL16(dest + 8 * i + 2, Y);
    2236     1439264 :         AV_WL16(dest + 8 * i + 4, U);
    2237     1439264 :         AV_WL16(dest + 8 * i + 6, V);
    2238             :     }
    2239        4132 : }
    2240             : 
    2241       63464 : av_cold void ff_sws_init_output_funcs(SwsContext *c,
    2242             :                                       yuv2planar1_fn *yuv2plane1,
    2243             :                                       yuv2planarX_fn *yuv2planeX,
    2244             :                                       yuv2interleavedX_fn *yuv2nv12cX,
    2245             :                                       yuv2packed1_fn *yuv2packed1,
    2246             :                                       yuv2packed2_fn *yuv2packed2,
    2247             :                                       yuv2packedX_fn *yuv2packedX,
    2248             :                                       yuv2anyX_fn *yuv2anyX)
    2249             : {
    2250       63464 :     enum AVPixelFormat dstFormat = c->dstFormat;
    2251       63464 :     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
    2252             : 
    2253       63464 :     if (dstFormat == AV_PIX_FMT_P010LE || dstFormat == AV_PIX_FMT_P010BE) {
    2254          41 :         *yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
    2255          41 :         *yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
    2256          41 :         *yuv2nv12cX = yuv2p010cX_c;
    2257       63423 :     } else if (is16BPS(dstFormat)) {
    2258        3256 :         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
    2259        3256 :         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
    2260       60167 :     } else if (isNBPS(dstFormat)) {
    2261        5983 :         if (desc->comp[0].depth == 9) {
    2262         561 :             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
    2263         561 :             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
    2264        5422 :         } else if (desc->comp[0].depth == 10) {
    2265        4441 :             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
    2266        4441 :             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
    2267         981 :         } else if (desc->comp[0].depth == 12) {
    2268         699 :             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c  : yuv2planeX_12LE_c;
    2269         699 :             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c  : yuv2plane1_12LE_c;
    2270         282 :         } else if (desc->comp[0].depth == 14) {
    2271         282 :             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c  : yuv2planeX_14LE_c;
    2272         282 :             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c  : yuv2plane1_14LE_c;
    2273             :         } else
    2274           0 :             av_assert0(0);
    2275             :     } else {
    2276       54184 :         *yuv2plane1 = yuv2plane1_8_c;
    2277       54184 :         *yuv2planeX = yuv2planeX_8_c;
    2278       54184 :         if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)
    2279           6 :             *yuv2nv12cX = yuv2nv12cX_c;
    2280             :     }
    2281             : 
    2282       63464 :     if(c->flags & SWS_FULL_CHR_H_INT) {
    2283        5150 :         switch (dstFormat) {
    2284           6 :             case AV_PIX_FMT_RGBA:
    2285             : #if CONFIG_SMALL
    2286             :                 *yuv2packedX = yuv2rgba32_full_X_c;
    2287             :                 *yuv2packed2 = yuv2rgba32_full_2_c;
    2288             :                 *yuv2packed1 = yuv2rgba32_full_1_c;
    2289             : #else
    2290             : #if CONFIG_SWSCALE_ALPHA
    2291           6 :                 if (c->needAlpha) {
    2292           6 :                     *yuv2packedX = yuv2rgba32_full_X_c;
    2293           6 :                     *yuv2packed2 = yuv2rgba32_full_2_c;
    2294           6 :                     *yuv2packed1 = yuv2rgba32_full_1_c;
    2295             :                 } else
    2296             : #endif /* CONFIG_SWSCALE_ALPHA */
    2297             :                 {
    2298           0 :                     *yuv2packedX = yuv2rgbx32_full_X_c;
    2299           0 :                     *yuv2packed2 = yuv2rgbx32_full_2_c;
    2300           0 :                     *yuv2packed1 = yuv2rgbx32_full_1_c;
    2301             :                 }
    2302             : #endif /* !CONFIG_SMALL */
    2303           6 :                 break;
    2304           6 :             case AV_PIX_FMT_ARGB:
    2305             : #if CONFIG_SMALL
    2306             :                 *yuv2packedX = yuv2argb32_full_X_c;
    2307             :                 *yuv2packed2 = yuv2argb32_full_2_c;
    2308             :                 *yuv2packed1 = yuv2argb32_full_1_c;
    2309             : #else
    2310             : #if CONFIG_SWSCALE_ALPHA
    2311           6 :                 if (c->needAlpha) {
    2312           6 :                     *yuv2packedX = yuv2argb32_full_X_c;
    2313           6 :                     *yuv2packed2 = yuv2argb32_full_2_c;
    2314           6 :                     *yuv2packed1 = yuv2argb32_full_1_c;
    2315             :                 } else
    2316             : #endif /* CONFIG_SWSCALE_ALPHA */
    2317             :                 {
    2318           0 :                     *yuv2packedX = yuv2xrgb32_full_X_c;
    2319           0 :                     *yuv2packed2 = yuv2xrgb32_full_2_c;
    2320           0 :                     *yuv2packed1 = yuv2xrgb32_full_1_c;
    2321             :                 }
    2322             : #endif /* !CONFIG_SMALL */
    2323           6 :                 break;
    2324         739 :             case AV_PIX_FMT_BGRA:
    2325             : #if CONFIG_SMALL
    2326             :                 *yuv2packedX = yuv2bgra32_full_X_c;
    2327             :                 *yuv2packed2 = yuv2bgra32_full_2_c;
    2328             :                 *yuv2packed1 = yuv2bgra32_full_1_c;
    2329             : #else
    2330             : #if CONFIG_SWSCALE_ALPHA
    2331         739 :                 if (c->needAlpha) {
    2332          24 :                     *yuv2packedX = yuv2bgra32_full_X_c;
    2333          24 :                     *yuv2packed2 = yuv2bgra32_full_2_c;
    2334          24 :                     *yuv2packed1 = yuv2bgra32_full_1_c;
    2335             :                 } else
    2336             : #endif /* CONFIG_SWSCALE_ALPHA */
    2337             :                 {
    2338         715 :                     *yuv2packedX = yuv2bgrx32_full_X_c;
    2339         715 :                     *yuv2packed2 = yuv2bgrx32_full_2_c;
    2340         715 :                     *yuv2packed1 = yuv2bgrx32_full_1_c;
    2341             :                 }
    2342             : #endif /* !CONFIG_SMALL */
    2343         739 :                 break;
    2344           6 :             case AV_PIX_FMT_ABGR:
    2345             : #if CONFIG_SMALL
    2346             :                 *yuv2packedX = yuv2abgr32_full_X_c;
    2347             :                 *yuv2packed2 = yuv2abgr32_full_2_c;
    2348             :                 *yuv2packed1 = yuv2abgr32_full_1_c;
    2349             : #else
    2350             : #if CONFIG_SWSCALE_ALPHA
    2351           6 :                 if (c->needAlpha) {
    2352           6 :                     *yuv2packedX = yuv2abgr32_full_X_c;
    2353           6 :                     *yuv2packed2 = yuv2abgr32_full_2_c;
    2354           6 :                     *yuv2packed1 = yuv2abgr32_full_1_c;
    2355             :                 } else
    2356             : #endif /* CONFIG_SWSCALE_ALPHA */
    2357             :                 {
    2358           0 :                     *yuv2packedX = yuv2xbgr32_full_X_c;
    2359           0 :                     *yuv2packed2 = yuv2xbgr32_full_2_c;
    2360           0 :                     *yuv2packed1 = yuv2xbgr32_full_1_c;
    2361             :                 }
    2362             : #endif /* !CONFIG_SMALL */
    2363           6 :                 break;
    2364           3 :         case AV_PIX_FMT_RGBA64LE:
    2365             : #if CONFIG_SWSCALE_ALPHA
    2366           3 :             if (c->needAlpha) {
    2367           3 :                 *yuv2packedX = yuv2rgba64le_full_X_c;
    2368           3 :                 *yuv2packed2 = yuv2rgba64le_full_2_c;
    2369           3 :                 *yuv2packed1 = yuv2rgba64le_full_1_c;
    2370             :             } else
    2371             : #endif /* CONFIG_SWSCALE_ALPHA */
    2372             :             {
    2373           0 :                 *yuv2packedX = yuv2rgbx64le_full_X_c;
    2374           0 :                 *yuv2packed2 = yuv2rgbx64le_full_2_c;
    2375           0 :                 *yuv2packed1 = yuv2rgbx64le_full_1_c;
    2376             :             }
    2377           3 :             break;
    2378           3 :         case AV_PIX_FMT_RGBA64BE:
    2379             : #if CONFIG_SWSCALE_ALPHA
    2380           3 :             if (c->needAlpha) {
    2381           3 :                 *yuv2packedX = yuv2rgba64be_full_X_c;
    2382           3 :                 *yuv2packed2 = yuv2rgba64be_full_2_c;
    2383           3 :                 *yuv2packed1 = yuv2rgba64be_full_1_c;
    2384             :             } else
    2385             : #endif /* CONFIG_SWSCALE_ALPHA */
    2386             :             {
    2387           0 :                 *yuv2packedX = yuv2rgbx64be_full_X_c;
    2388           0 :                 *yuv2packed2 = yuv2rgbx64be_full_2_c;
    2389           0 :                 *yuv2packed1 = yuv2rgbx64be_full_1_c;
    2390             :             }
    2391           3 :             break;
    2392           3 :         case AV_PIX_FMT_BGRA64LE:
    2393             : #if CONFIG_SWSCALE_ALPHA
    2394           3 :             if (c->needAlpha) {
    2395           3 :                 *yuv2packedX = yuv2bgra64le_full_X_c;
    2396           3 :                 *yuv2packed2 = yuv2bgra64le_full_2_c;
    2397           3 :                 *yuv2packed1 = yuv2bgra64le_full_1_c;
    2398             :             } else
    2399             : #endif /* CONFIG_SWSCALE_ALPHA */
    2400             :             {
    2401           0 :                 *yuv2packedX = yuv2bgrx64le_full_X_c;
    2402           0 :                 *yuv2packed2 = yuv2bgrx64le_full_2_c;
    2403           0 :                 *yuv2packed1 = yuv2bgrx64le_full_1_c;
    2404             :             }
    2405           3 :             break;
    2406           3 :         case AV_PIX_FMT_BGRA64BE:
    2407             : #if CONFIG_SWSCALE_ALPHA
    2408           3 :             if (c->needAlpha) {
    2409           3 :                 *yuv2packedX = yuv2bgra64be_full_X_c;
    2410           3 :                 *yuv2packed2 = yuv2bgra64be_full_2_c;
    2411           3 :                 *yuv2packed1 = yuv2bgra64be_full_1_c;
    2412             :             } else
    2413             : #endif /* CONFIG_SWSCALE_ALPHA */
    2414             :             {
    2415           0 :                 *yuv2packedX = yuv2bgrx64be_full_X_c;
    2416           0 :                 *yuv2packed2 = yuv2bgrx64be_full_2_c;
    2417           0 :                 *yuv2packed1 = yuv2bgrx64be_full_1_c;
    2418             :             }
    2419           3 :             break;
    2420             : 
    2421         460 :         case AV_PIX_FMT_RGB24:
    2422         460 :             *yuv2packedX = yuv2rgb24_full_X_c;
    2423         460 :             *yuv2packed2 = yuv2rgb24_full_2_c;
    2424         460 :             *yuv2packed1 = yuv2rgb24_full_1_c;
    2425         460 :             break;
    2426         811 :         case AV_PIX_FMT_BGR24:
    2427         811 :             *yuv2packedX = yuv2bgr24_full_X_c;
    2428         811 :             *yuv2packed2 = yuv2bgr24_full_2_c;
    2429         811 :             *yuv2packed1 = yuv2bgr24_full_1_c;
    2430         811 :             break;
    2431           6 :         case AV_PIX_FMT_RGB48LE:
    2432           6 :             *yuv2packedX = yuv2rgb48le_full_X_c;
    2433           6 :             *yuv2packed2 = yuv2rgb48le_full_2_c;
    2434           6 :             *yuv2packed1 = yuv2rgb48le_full_1_c;
    2435           6 :             break;
    2436           3 :         case AV_PIX_FMT_BGR48LE:
    2437           3 :             *yuv2packedX = yuv2bgr48le_full_X_c;
    2438           3 :             *yuv2packed2 = yuv2bgr48le_full_2_c;
    2439           3 :             *yuv2packed1 = yuv2bgr48le_full_1_c;
    2440           3 :             break;
    2441           6 :         case AV_PIX_FMT_RGB48BE:
    2442           6 :             *yuv2packedX = yuv2rgb48be_full_X_c;
    2443           6 :             *yuv2packed2 = yuv2rgb48be_full_2_c;
    2444           6 :             *yuv2packed1 = yuv2rgb48be_full_1_c;
    2445           6 :             break;
    2446           3 :         case AV_PIX_FMT_BGR48BE:
    2447           3 :             *yuv2packedX = yuv2bgr48be_full_X_c;
    2448           3 :             *yuv2packed2 = yuv2bgr48be_full_2_c;
    2449           3 :             *yuv2packed1 = yuv2bgr48be_full_1_c;
    2450           3 :             break;
    2451         350 :         case AV_PIX_FMT_BGR4_BYTE:
    2452         350 :             *yuv2packedX = yuv2bgr4_byte_full_X_c;
    2453         350 :             *yuv2packed2 = yuv2bgr4_byte_full_2_c;
    2454         350 :             *yuv2packed1 = yuv2bgr4_byte_full_1_c;
    2455         350 :             break;
    2456         350 :         case AV_PIX_FMT_RGB4_BYTE:
    2457         350 :             *yuv2packedX = yuv2rgb4_byte_full_X_c;
    2458         350 :             *yuv2packed2 = yuv2rgb4_byte_full_2_c;
    2459         350 :             *yuv2packed1 = yuv2rgb4_byte_full_1_c;
    2460         350 :             break;
    2461         700 :         case AV_PIX_FMT_BGR8:
    2462         700 :             *yuv2packedX = yuv2bgr8_full_X_c;
    2463         700 :             *yuv2packed2 = yuv2bgr8_full_2_c;
    2464         700 :             *yuv2packed1 = yuv2bgr8_full_1_c;
    2465         700 :             break;
    2466         350 :         case AV_PIX_FMT_RGB8:
    2467         350 :             *yuv2packedX = yuv2rgb8_full_X_c;
    2468         350 :             *yuv2packed2 = yuv2rgb8_full_2_c;
    2469         350 :             *yuv2packed1 = yuv2rgb8_full_1_c;
    2470         350 :             break;
    2471         942 :         case AV_PIX_FMT_GBRP:
    2472             :         case AV_PIX_FMT_GBRP9BE:
    2473             :         case AV_PIX_FMT_GBRP9LE:
    2474             :         case AV_PIX_FMT_GBRP10BE:
    2475             :         case AV_PIX_FMT_GBRP10LE:
    2476             :         case AV_PIX_FMT_GBRP12BE:
    2477             :         case AV_PIX_FMT_GBRP12LE:
    2478             :         case AV_PIX_FMT_GBRP14BE:
    2479             :         case AV_PIX_FMT_GBRP14LE:
    2480             :         case AV_PIX_FMT_GBRAP:
    2481             :         case AV_PIX_FMT_GBRAP10BE:
    2482             :         case AV_PIX_FMT_GBRAP10LE:
    2483             :         case AV_PIX_FMT_GBRAP12BE:
    2484             :         case AV_PIX_FMT_GBRAP12LE:
    2485         942 :             *yuv2anyX = yuv2gbrp_full_X_c;
    2486         942 :             break;
    2487         400 :         case AV_PIX_FMT_GBRP16BE:
    2488             :         case AV_PIX_FMT_GBRP16LE:
    2489             :         case AV_PIX_FMT_GBRAP16BE:
    2490             :         case AV_PIX_FMT_GBRAP16LE:
    2491         400 :             *yuv2anyX = yuv2gbrp16_full_X_c;
    2492         400 :             break;
    2493             :         }
    2494        5150 :         if (!*yuv2packedX && !*yuv2anyX)
    2495           0 :             goto YUV_PACKED;
    2496             :     } else {
    2497      116628 :         YUV_PACKED:
    2498       58314 :         switch (dstFormat) {
    2499          68 :         case AV_PIX_FMT_RGBA64LE:
    2500             : #if CONFIG_SWSCALE_ALPHA
    2501          68 :             if (c->needAlpha) {
    2502           0 :                 *yuv2packed1 = yuv2rgba64le_1_c;
    2503           0 :                 *yuv2packed2 = yuv2rgba64le_2_c;
    2504           0 :                 *yuv2packedX = yuv2rgba64le_X_c;
    2505             :             } else
    2506             : #endif /* CONFIG_SWSCALE_ALPHA */
    2507             :             {
    2508          68 :                 *yuv2packed1 = yuv2rgbx64le_1_c;
    2509          68 :                 *yuv2packed2 = yuv2rgbx64le_2_c;
    2510          68 :                 *yuv2packedX = yuv2rgbx64le_X_c;
    2511             :             }
    2512          68 :             break;
    2513          38 :         case AV_PIX_FMT_RGBA64BE:
    2514             : #if CONFIG_SWSCALE_ALPHA
    2515          38 :             if (c->needAlpha) {
    2516           0 :                 *yuv2packed1 = yuv2rgba64be_1_c;
    2517           0 :                 *yuv2packed2 = yuv2rgba64be_2_c;
    2518           0 :                 *yuv2packedX = yuv2rgba64be_X_c;
    2519             :             } else
    2520             : #endif /* CONFIG_SWSCALE_ALPHA */
    2521             :             {
    2522          38 :                 *yuv2packed1 = yuv2rgbx64be_1_c;
    2523          38 :                 *yuv2packed2 = yuv2rgbx64be_2_c;
    2524          38 :                 *yuv2packedX = yuv2rgbx64be_X_c;
    2525             :             }
    2526          38 :             break;
    2527          38 :         case AV_PIX_FMT_BGRA64LE:
    2528             : #if CONFIG_SWSCALE_ALPHA
    2529          38 :             if (c->needAlpha) {
    2530           0 :                 *yuv2packed1 = yuv2bgra64le_1_c;
    2531           0 :                 *yuv2packed2 = yuv2bgra64le_2_c;
    2532           0 :                 *yuv2packedX = yuv2bgra64le_X_c;
    2533             :             } else
    2534             : #endif /* CONFIG_SWSCALE_ALPHA */
    2535             :             {
    2536          38 :                 *yuv2packed1 = yuv2bgrx64le_1_c;
    2537          38 :                 *yuv2packed2 = yuv2bgrx64le_2_c;
    2538          38 :                 *yuv2packedX = yuv2bgrx64le_X_c;
    2539             :             }
    2540          38 :             break;
    2541          38 :         case AV_PIX_FMT_BGRA64BE:
    2542             : #if CONFIG_SWSCALE_ALPHA
    2543          38 :             if (c->needAlpha) {
    2544           0 :                 *yuv2packed1 = yuv2bgra64be_1_c;
    2545           0 :                 *yuv2packed2 = yuv2bgra64be_2_c;
    2546           0 :                 *yuv2packedX = yuv2bgra64be_X_c;
    2547             :             } else
    2548             : #endif /* CONFIG_SWSCALE_ALPHA */
    2549             :             {
    2550          38 :                 *yuv2packed1 = yuv2bgrx64be_1_c;
    2551          38 :                 *yuv2packed2 = yuv2bgrx64be_2_c;
    2552          38 :                 *yuv2packedX = yuv2bgrx64be_X_c;
    2553             :             }
    2554          38 :             break;
    2555         944 :         case AV_PIX_FMT_RGB48LE:
    2556         944 :             *yuv2packed1 = yuv2rgb48le_1_c;
    2557         944 :             *yuv2packed2 = yuv2rgb48le_2_c;
    2558         944 :             *yuv2packedX = yuv2rgb48le_X_c;
    2559         944 :             break;
    2560         130 :         case AV_PIX_FMT_RGB48BE:
    2561         130 :             *yuv2packed1 = yuv2rgb48be_1_c;
    2562         130 :             *yuv2packed2 = yuv2rgb48be_2_c;
    2563         130 :             *yuv2packedX = yuv2rgb48be_X_c;
    2564         130 :             break;
    2565          38 :         case AV_PIX_FMT_BGR48LE:
    2566          38 :             *yuv2packed1 = yuv2bgr48le_1_c;
    2567          38 :             *yuv2packed2 = yuv2bgr48le_2_c;
    2568          38 :             *yuv2packedX = yuv2bgr48le_X_c;
    2569          38 :             break;
    2570          38 :         case AV_PIX_FMT_BGR48BE:
    2571          38 :             *yuv2packed1 = yuv2bgr48be_1_c;
    2572          38 :             *yuv2packed2 = yuv2bgr48be_2_c;
    2573          38 :             *yuv2packedX = yuv2bgr48be_X_c;
    2574          38 :             break;
    2575        1524 :         case AV_PIX_FMT_RGB32:
    2576             :         case AV_PIX_FMT_BGR32:
    2577             : #if CONFIG_SMALL
    2578             :             *yuv2packed1 = yuv2rgb32_1_c;
    2579             :             *yuv2packed2 = yuv2rgb32_2_c;
    2580             :             *yuv2packedX = yuv2rgb32_X_c;
    2581             : #else
    2582             : #if CONFIG_SWSCALE_ALPHA
    2583        1524 :                 if (c->needAlpha) {
    2584           0 :                     *yuv2packed1 = yuv2rgba32_1_c;
    2585           0 :                     *yuv2packed2 = yuv2rgba32_2_c;
    2586           0 :                     *yuv2packedX = yuv2rgba32_X_c;
    2587             :                 } else
    2588             : #endif /* CONFIG_SWSCALE_ALPHA */
    2589             :                 {
    2590        1524 :                     *yuv2packed1 = yuv2rgbx32_1_c;
    2591        1524 :                     *yuv2packed2 = yuv2rgbx32_2_c;
    2592        1524 :                     *yuv2packedX = yuv2rgbx32_X_c;
    2593             :                 }
    2594             : #endif /* !CONFIG_SMALL */
    2595        1524 :             break;
    2596         301 :         case AV_PIX_FMT_RGB32_1:
    2597             :         case AV_PIX_FMT_BGR32_1:
    2598             : #if CONFIG_SMALL
    2599             :                 *yuv2packed1 = yuv2rgb32_1_1_c;
    2600             :                 *yuv2packed2 = yuv2rgb32_1_2_c;
    2601             :                 *yuv2packedX = yuv2rgb32_1_X_c;
    2602             : #else
    2603             : #if CONFIG_SWSCALE_ALPHA
    2604         301 :                 if (c->needAlpha) {
    2605           0 :                     *yuv2packed1 = yuv2rgba32_1_1_c;
    2606           0 :                     *yuv2packed2 = yuv2rgba32_1_2_c;
    2607           0 :                     *yuv2packedX = yuv2rgba32_1_X_c;
    2608             :                 } else
    2609             : #endif /* CONFIG_SWSCALE_ALPHA */
    2610             :                 {
    2611         301 :                     *yuv2packed1 = yuv2rgbx32_1_1_c;
    2612         301 :                     *yuv2packed2 = yuv2rgbx32_1_2_c;
    2613         301 :                     *yuv2packedX = yuv2rgbx32_1_X_c;
    2614             :                 }
    2615             : #endif /* !CONFIG_SMALL */
    2616         301 :                 break;
    2617        5799 :         case AV_PIX_FMT_RGB24:
    2618        5799 :             *yuv2packed1 = yuv2rgb24_1_c;
    2619        5799 :             *yuv2packed2 = yuv2rgb24_2_c;
    2620        5799 :             *yuv2packedX = yuv2rgb24_X_c;
    2621        5799 :             break;
    2622        1348 :         case AV_PIX_FMT_BGR24:
    2623        1348 :             *yuv2packed1 = yuv2bgr24_1_c;
    2624        1348 :             *yuv2packed2 = yuv2bgr24_2_c;
    2625        1348 :             *yuv2packedX = yuv2bgr24_X_c;
    2626        1348 :             break;
    2627         612 :         case AV_PIX_FMT_RGB565LE:
    2628             :         case AV_PIX_FMT_RGB565BE:
    2629             :         case AV_PIX_FMT_BGR565LE:
    2630             :         case AV_PIX_FMT_BGR565BE:
    2631         612 :             *yuv2packed1 = yuv2rgb16_1_c;
    2632         612 :             *yuv2packed2 = yuv2rgb16_2_c;
    2633         612 :             *yuv2packedX = yuv2rgb16_X_c;
    2634         612 :             break;
    2635        1319 :         case AV_PIX_FMT_RGB555LE:
    2636             :         case AV_PIX_FMT_RGB555BE:
    2637             :         case AV_PIX_FMT_BGR555LE:
    2638             :         case AV_PIX_FMT_BGR555BE:
    2639        1319 :             *yuv2packed1 = yuv2rgb15_1_c;
    2640        1319 :             *yuv2packed2 = yuv2rgb15_2_c;
    2641        1319 :             *yuv2packedX = yuv2rgb15_X_c;
    2642        1319 :             break;
    2643         164 :         case AV_PIX_FMT_RGB444LE:
    2644             :         case AV_PIX_FMT_RGB444BE:
    2645             :         case AV_PIX_FMT_BGR444LE:
    2646             :         case AV_PIX_FMT_BGR444BE:
    2647         164 :             *yuv2packed1 = yuv2rgb12_1_c;
    2648         164 :             *yuv2packed2 = yuv2rgb12_2_c;
    2649         164 :             *yuv2packedX = yuv2rgb12_X_c;
    2650         164 :             break;
    2651         177 :         case AV_PIX_FMT_RGB8:
    2652             :         case AV_PIX_FMT_BGR8:
    2653         177 :             *yuv2packed1 = yuv2rgb8_1_c;
    2654         177 :             *yuv2packed2 = yuv2rgb8_2_c;
    2655         177 :             *yuv2packedX = yuv2rgb8_X_c;
    2656         177 :             break;
    2657           0 :         case AV_PIX_FMT_RGB4:
    2658             :         case AV_PIX_FMT_BGR4:
    2659           0 :             *yuv2packed1 = yuv2rgb4_1_c;
    2660           0 :             *yuv2packed2 = yuv2rgb4_2_c;
    2661           0 :             *yuv2packedX = yuv2rgb4_X_c;
    2662           0 :             break;
    2663         103 :         case AV_PIX_FMT_RGB4_BYTE:
    2664             :         case AV_PIX_FMT_BGR4_BYTE:
    2665         103 :             *yuv2packed1 = yuv2rgb4b_1_c;
    2666         103 :             *yuv2packed2 = yuv2rgb4b_2_c;
    2667         103 :             *yuv2packedX = yuv2rgb4b_X_c;
    2668         103 :             break;
    2669             :         }
    2670             :     }
    2671       63464 :     switch (dstFormat) {
    2672         952 :     case AV_PIX_FMT_MONOWHITE:
    2673         952 :         *yuv2packed1 = yuv2monowhite_1_c;
    2674         952 :         *yuv2packed2 = yuv2monowhite_2_c;
    2675         952 :         *yuv2packedX = yuv2monowhite_X_c;
    2676         952 :         break;
    2677          64 :     case AV_PIX_FMT_MONOBLACK:
    2678          64 :         *yuv2packed1 = yuv2monoblack_1_c;
    2679          64 :         *yuv2packed2 = yuv2monoblack_2_c;
    2680          64 :         *yuv2packedX = yuv2monoblack_X_c;
    2681          64 :         break;
    2682          94 :     case AV_PIX_FMT_YUYV422:
    2683          94 :         *yuv2packed1 = yuv2yuyv422_1_c;
    2684          94 :         *yuv2packed2 = yuv2yuyv422_2_c;
    2685          94 :         *yuv2packedX = yuv2yuyv422_X_c;
    2686          94 :         break;
    2687          35 :     case AV_PIX_FMT_YVYU422:
    2688          35 :         *yuv2packed1 = yuv2yvyu422_1_c;
    2689          35 :         *yuv2packed2 = yuv2yvyu422_2_c;
    2690          35 :         *yuv2packedX = yuv2yvyu422_X_c;
    2691          35 :         break;
    2692         338 :     case AV_PIX_FMT_UYVY422:
    2693         338 :         *yuv2packed1 = yuv2uyvy422_1_c;
    2694         338 :         *yuv2packed2 = yuv2uyvy422_2_c;
    2695         338 :         *yuv2packedX = yuv2uyvy422_X_c;
    2696         338 :         break;
    2697          44 :     case AV_PIX_FMT_YA8:
    2698          44 :         *yuv2packed1 = yuv2ya8_1_c;
    2699          44 :         *yuv2packed2 = yuv2ya8_2_c;
    2700          44 :         *yuv2packedX = yuv2ya8_X_c;
    2701          44 :         break;
    2702          41 :     case AV_PIX_FMT_AYUV64LE:
    2703          41 :         *yuv2packedX = yuv2ayuv64le_X_c;
    2704          41 :         break;
    2705             :     }
    2706       63464 : }

Generated by: LCOV version 1.13