Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com> | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include "libavutil/x86/cpu.h" | ||
22 | |||
23 | #include "libavfilter/colorspacedsp.h" | ||
24 | |||
25 | #define decl_yuv2yuv_fn(t) \ | ||
26 | void ff_yuv2yuv_##t##_sse2(uint8_t *yuv_out[3], const ptrdiff_t yuv_out_stride[3], \ | ||
27 | uint8_t *yuv_in[3], const ptrdiff_t yuv_in_stride[3], \ | ||
28 | int w, int h, const int16_t yuv2yuv_coeffs[3][3][8], \ | ||
29 | const int16_t yuv_offset[2][8]) | ||
30 | |||
31 | #define decl_yuv2yuv_fns(ss) \ | ||
32 | decl_yuv2yuv_fn(ss##p8to8); \ | ||
33 | decl_yuv2yuv_fn(ss##p10to8); \ | ||
34 | decl_yuv2yuv_fn(ss##p12to8); \ | ||
35 | decl_yuv2yuv_fn(ss##p8to10); \ | ||
36 | decl_yuv2yuv_fn(ss##p10to10); \ | ||
37 | decl_yuv2yuv_fn(ss##p12to10); \ | ||
38 | decl_yuv2yuv_fn(ss##p8to12); \ | ||
39 | decl_yuv2yuv_fn(ss##p10to12); \ | ||
40 | decl_yuv2yuv_fn(ss##p12to12) | ||
41 | |||
42 | decl_yuv2yuv_fns(420); | ||
43 | decl_yuv2yuv_fns(422); | ||
44 | decl_yuv2yuv_fns(444); | ||
45 | |||
46 | #define decl_yuv2rgb_fn(t) \ | ||
47 | void ff_yuv2rgb_##t##_sse2(int16_t *rgb_out[3], ptrdiff_t rgb_stride, \ | ||
48 | uint8_t *yuv_in[3], const ptrdiff_t yuv_stride[3], \ | ||
49 | int w, int h, const int16_t coeff[3][3][8], \ | ||
50 | const int16_t yuv_offset[8]) | ||
51 | |||
52 | #define decl_yuv2rgb_fns(ss) \ | ||
53 | decl_yuv2rgb_fn(ss##p8); \ | ||
54 | decl_yuv2rgb_fn(ss##p10); \ | ||
55 | decl_yuv2rgb_fn(ss##p12) | ||
56 | |||
57 | decl_yuv2rgb_fns(420); | ||
58 | decl_yuv2rgb_fns(422); | ||
59 | decl_yuv2rgb_fns(444); | ||
60 | |||
61 | #define decl_rgb2yuv_fn(t) \ | ||
62 | void ff_rgb2yuv_##t##_sse2(uint8_t *yuv_out[3], const ptrdiff_t yuv_stride[3], \ | ||
63 | int16_t *rgb_in[3], ptrdiff_t rgb_stride, \ | ||
64 | int w, int h, const int16_t coeff[3][3][8], \ | ||
65 | const int16_t yuv_offset[8]) | ||
66 | |||
67 | #define decl_rgb2yuv_fns(ss) \ | ||
68 | decl_rgb2yuv_fn(ss##p8); \ | ||
69 | decl_rgb2yuv_fn(ss##p10); \ | ||
70 | decl_rgb2yuv_fn(ss##p12) | ||
71 | |||
72 | decl_rgb2yuv_fns(420); | ||
73 | decl_rgb2yuv_fns(422); | ||
74 | decl_rgb2yuv_fns(444); | ||
75 | |||
76 | void ff_multiply3x3_sse2(int16_t *data[3], ptrdiff_t stride, int w, int h, | ||
77 | const int16_t coeff[3][3][8]); | ||
78 | |||
79 | 52 | void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) | |
80 | { | ||
81 | 52 | int cpu_flags = av_get_cpu_flags(); | |
82 | |||
83 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 16 times.
|
52 | if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { |
84 | #define assign_yuv2yuv_fns(ss) \ | ||
85 | dsp->yuv2yuv[BPP_8 ][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p8to8_sse2; \ | ||
86 | dsp->yuv2yuv[BPP_8 ][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p8to10_sse2; \ | ||
87 | dsp->yuv2yuv[BPP_8 ][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p8to12_sse2; \ | ||
88 | dsp->yuv2yuv[BPP_10][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p10to8_sse2; \ | ||
89 | dsp->yuv2yuv[BPP_10][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p10to10_sse2; \ | ||
90 | dsp->yuv2yuv[BPP_10][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p10to12_sse2; \ | ||
91 | dsp->yuv2yuv[BPP_12][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p12to8_sse2; \ | ||
92 | dsp->yuv2yuv[BPP_12][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p12to10_sse2; \ | ||
93 | dsp->yuv2yuv[BPP_12][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p12to12_sse2 | ||
94 | |||
95 | 36 | assign_yuv2yuv_fns(420); | |
96 | 36 | assign_yuv2yuv_fns(422); | |
97 | 36 | assign_yuv2yuv_fns(444); | |
98 | |||
99 | #define assign_yuv2rgb_fns(ss) \ | ||
100 | dsp->yuv2rgb[BPP_8 ][SS_##ss] = ff_yuv2rgb_##ss##p8_sse2; \ | ||
101 | dsp->yuv2rgb[BPP_10][SS_##ss] = ff_yuv2rgb_##ss##p10_sse2; \ | ||
102 | dsp->yuv2rgb[BPP_12][SS_##ss] = ff_yuv2rgb_##ss##p12_sse2 | ||
103 | |||
104 | 36 | assign_yuv2rgb_fns(420); | |
105 | 36 | assign_yuv2rgb_fns(422); | |
106 | 36 | assign_yuv2rgb_fns(444); | |
107 | |||
108 | #define assign_rgb2yuv_fns(ss) \ | ||
109 | dsp->rgb2yuv[BPP_8 ][SS_##ss] = ff_rgb2yuv_##ss##p8_sse2; \ | ||
110 | dsp->rgb2yuv[BPP_10][SS_##ss] = ff_rgb2yuv_##ss##p10_sse2; \ | ||
111 | dsp->rgb2yuv[BPP_12][SS_##ss] = ff_rgb2yuv_##ss##p12_sse2 | ||
112 | |||
113 | 36 | assign_rgb2yuv_fns(420); | |
114 | 36 | assign_rgb2yuv_fns(422); | |
115 | 36 | assign_rgb2yuv_fns(444); | |
116 | |||
117 | 36 | dsp->multiply3x3 = ff_multiply3x3_sse2; | |
118 | } | ||
119 | 52 | } | |
120 |