| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com> | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include "libavutil/x86/cpu.h" | ||
| 22 | |||
| 23 | #include "libavfilter/colorspacedsp.h" | ||
| 24 | |||
| 25 | #define decl_yuv2yuv_fn(t) \ | ||
| 26 | void ff_yuv2yuv_##t##_sse2(uint8_t *yuv_out[3], const ptrdiff_t yuv_out_stride[3], \ | ||
| 27 | uint8_t *yuv_in[3], const ptrdiff_t yuv_in_stride[3], \ | ||
| 28 | int w, int h, const int16_t yuv2yuv_coeffs[3][3][8], \ | ||
| 29 | const int16_t yuv_offset[2][8]) | ||
| 30 | |||
| 31 | #define decl_yuv2yuv_fns(ss) \ | ||
| 32 | decl_yuv2yuv_fn(ss##p8to8); \ | ||
| 33 | decl_yuv2yuv_fn(ss##p10to8); \ | ||
| 34 | decl_yuv2yuv_fn(ss##p12to8); \ | ||
| 35 | decl_yuv2yuv_fn(ss##p8to10); \ | ||
| 36 | decl_yuv2yuv_fn(ss##p10to10); \ | ||
| 37 | decl_yuv2yuv_fn(ss##p12to10); \ | ||
| 38 | decl_yuv2yuv_fn(ss##p8to12); \ | ||
| 39 | decl_yuv2yuv_fn(ss##p10to12); \ | ||
| 40 | decl_yuv2yuv_fn(ss##p12to12) | ||
| 41 | |||
| 42 | decl_yuv2yuv_fns(420); | ||
| 43 | decl_yuv2yuv_fns(422); | ||
| 44 | decl_yuv2yuv_fns(444); | ||
| 45 | |||
| 46 | #define decl_yuv2rgb_fn(t) \ | ||
| 47 | void ff_yuv2rgb_##t##_sse2(int16_t *rgb_out[3], ptrdiff_t rgb_stride, \ | ||
| 48 | uint8_t *yuv_in[3], const ptrdiff_t yuv_stride[3], \ | ||
| 49 | int w, int h, const int16_t coeff[3][3][8], \ | ||
| 50 | const int16_t yuv_offset[8]) | ||
| 51 | |||
| 52 | #define decl_yuv2rgb_fns(ss) \ | ||
| 53 | decl_yuv2rgb_fn(ss##p8); \ | ||
| 54 | decl_yuv2rgb_fn(ss##p10); \ | ||
| 55 | decl_yuv2rgb_fn(ss##p12) | ||
| 56 | |||
| 57 | decl_yuv2rgb_fns(420); | ||
| 58 | decl_yuv2rgb_fns(422); | ||
| 59 | decl_yuv2rgb_fns(444); | ||
| 60 | |||
| 61 | #define decl_rgb2yuv_fn(t) \ | ||
| 62 | void ff_rgb2yuv_##t##_sse2(uint8_t *yuv_out[3], const ptrdiff_t yuv_stride[3], \ | ||
| 63 | int16_t *rgb_in[3], ptrdiff_t rgb_stride, \ | ||
| 64 | int w, int h, const int16_t coeff[3][3][8], \ | ||
| 65 | const int16_t yuv_offset[8]) | ||
| 66 | |||
| 67 | #define decl_rgb2yuv_fns(ss) \ | ||
| 68 | decl_rgb2yuv_fn(ss##p8); \ | ||
| 69 | decl_rgb2yuv_fn(ss##p10); \ | ||
| 70 | decl_rgb2yuv_fn(ss##p12) | ||
| 71 | |||
| 72 | decl_rgb2yuv_fns(420); | ||
| 73 | decl_rgb2yuv_fns(422); | ||
| 74 | decl_rgb2yuv_fns(444); | ||
| 75 | |||
| 76 | void ff_multiply3x3_sse2(int16_t *data[3], ptrdiff_t stride, int w, int h, | ||
| 77 | const int16_t coeff[3][3][8]); | ||
| 78 | |||
| 79 | 56 | void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) | |
| 80 | { | ||
| 81 | #if ARCH_X86_64 | ||
| 82 | 56 | int cpu_flags = av_get_cpu_flags(); | |
| 83 | |||
| 84 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 16 times.
|
56 | if (EXTERNAL_SSE2(cpu_flags)) { |
| 85 | #define assign_yuv2yuv_fns(ss) \ | ||
| 86 | dsp->yuv2yuv[BPP_8 ][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p8to8_sse2; \ | ||
| 87 | dsp->yuv2yuv[BPP_8 ][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p8to10_sse2; \ | ||
| 88 | dsp->yuv2yuv[BPP_8 ][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p8to12_sse2; \ | ||
| 89 | dsp->yuv2yuv[BPP_10][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p10to8_sse2; \ | ||
| 90 | dsp->yuv2yuv[BPP_10][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p10to10_sse2; \ | ||
| 91 | dsp->yuv2yuv[BPP_10][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p10to12_sse2; \ | ||
| 92 | dsp->yuv2yuv[BPP_12][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p12to8_sse2; \ | ||
| 93 | dsp->yuv2yuv[BPP_12][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p12to10_sse2; \ | ||
| 94 | dsp->yuv2yuv[BPP_12][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p12to12_sse2 | ||
| 95 | |||
| 96 | 40 | assign_yuv2yuv_fns(420); | |
| 97 | 40 | assign_yuv2yuv_fns(422); | |
| 98 | 40 | assign_yuv2yuv_fns(444); | |
| 99 | |||
| 100 | #define assign_yuv2rgb_fns(ss) \ | ||
| 101 | dsp->yuv2rgb[BPP_8 ][SS_##ss] = ff_yuv2rgb_##ss##p8_sse2; \ | ||
| 102 | dsp->yuv2rgb[BPP_10][SS_##ss] = ff_yuv2rgb_##ss##p10_sse2; \ | ||
| 103 | dsp->yuv2rgb[BPP_12][SS_##ss] = ff_yuv2rgb_##ss##p12_sse2 | ||
| 104 | |||
| 105 | 40 | assign_yuv2rgb_fns(420); | |
| 106 | 40 | assign_yuv2rgb_fns(422); | |
| 107 | 40 | assign_yuv2rgb_fns(444); | |
| 108 | |||
| 109 | #define assign_rgb2yuv_fns(ss) \ | ||
| 110 | dsp->rgb2yuv[BPP_8 ][SS_##ss] = ff_rgb2yuv_##ss##p8_sse2; \ | ||
| 111 | dsp->rgb2yuv[BPP_10][SS_##ss] = ff_rgb2yuv_##ss##p10_sse2; \ | ||
| 112 | dsp->rgb2yuv[BPP_12][SS_##ss] = ff_rgb2yuv_##ss##p12_sse2 | ||
| 113 | |||
| 114 | 40 | assign_rgb2yuv_fns(420); | |
| 115 | 40 | assign_rgb2yuv_fns(422); | |
| 116 | 40 | assign_rgb2yuv_fns(444); | |
| 117 | |||
| 118 | 40 | dsp->multiply3x3 = ff_multiply3x3_sse2; | |
| 119 | } | ||
| 120 | #endif | ||
| 121 | 56 | } | |
| 122 |