Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * software RGB to RGB converter | ||
3 | * pluralize by software PAL8 to RGB converter | ||
4 | * software YUV to YUV converter | ||
5 | * software YUV to RGB converter | ||
6 | * Written by Nick Kurshev. | ||
7 | * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) | ||
8 | * | ||
9 | * This file is part of FFmpeg. | ||
10 | * | ||
11 | * FFmpeg is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU Lesser General Public | ||
13 | * License as published by the Free Software Foundation; either | ||
14 | * version 2.1 of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * FFmpeg is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * Lesser General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU Lesser General Public | ||
22 | * License along with FFmpeg; if not, write to the Free Software | ||
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
24 | */ | ||
25 | |||
26 | #include <stdint.h> | ||
27 | |||
28 | #include "config.h" | ||
29 | #include "libavutil/attributes.h" | ||
30 | #include "libavutil/x86/cpu.h" | ||
31 | #include "libavutil/cpu.h" | ||
32 | #include "libavutil/bswap.h" | ||
33 | #include "libavutil/mem_internal.h" | ||
34 | |||
35 | #include "libswscale/rgb2rgb.h" | ||
36 | #include "libswscale/swscale.h" | ||
37 | #include "libswscale/swscale_internal.h" | ||
38 | |||
39 | #if HAVE_INLINE_ASM | ||
40 | |||
41 | DECLARE_ASM_CONST(8, uint64_t, mmx_ff) = 0x00000000000000FFULL; | ||
42 | DECLARE_ASM_CONST(8, uint64_t, mmx_null) = 0x0000000000000000ULL; | ||
43 | DECLARE_ASM_CONST(8, uint64_t, mask32a) = 0xFF000000FF000000ULL; | ||
44 | DECLARE_ASM_CONST(8, uint64_t, mask3216br) = 0x00F800F800F800F8ULL; | ||
45 | DECLARE_ASM_CONST(8, uint64_t, mask3216g) = 0x0000FC000000FC00ULL; | ||
46 | DECLARE_ASM_CONST(8, uint64_t, mask3215g) = 0x0000F8000000F800ULL; | ||
47 | DECLARE_ASM_CONST(8, uint64_t, mul3216) = 0x2000000420000004ULL; | ||
48 | DECLARE_ASM_CONST(8, uint64_t, mul3215) = 0x2000000820000008ULL; | ||
49 | DECLARE_ASM_CONST(8, uint64_t, mask24b) = 0x00FF0000FF0000FFULL; | ||
50 | DECLARE_ASM_CONST(8, uint64_t, mask24g) = 0xFF0000FF0000FF00ULL; | ||
51 | DECLARE_ASM_CONST(8, uint64_t, mask24r) = 0x0000FF0000FF0000ULL; | ||
52 | DECLARE_ASM_CONST(8, uint64_t, mask24l) = 0x0000000000FFFFFFULL; | ||
53 | DECLARE_ASM_CONST(8, uint64_t, mask24h) = 0x0000FFFFFF000000ULL; | ||
54 | DECLARE_ASM_CONST(8, uint64_t, mask15b) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ | ||
55 | DECLARE_ASM_CONST(8, uint64_t, mask15rg) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ | ||
56 | DECLARE_ASM_CONST(8, uint64_t, mask15s) = 0xFFE0FFE0FFE0FFE0ULL; | ||
57 | DECLARE_ASM_CONST(8, uint64_t, mask15g) = 0x03E003E003E003E0ULL; | ||
58 | DECLARE_ASM_CONST(8, uint64_t, mask15r) = 0x7C007C007C007C00ULL; | ||
59 | #define mask16b mask15b | ||
60 | DECLARE_ASM_CONST(8, uint64_t, mask16g) = 0x07E007E007E007E0ULL; | ||
61 | DECLARE_ASM_CONST(8, uint64_t, mask16r) = 0xF800F800F800F800ULL; | ||
62 | DECLARE_ASM_CONST(8, uint64_t, red_16mask) = 0x0000f8000000f800ULL; | ||
63 | DECLARE_ASM_CONST(8, uint64_t, green_16mask) = 0x000007e0000007e0ULL; | ||
64 | DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; | ||
65 | DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; | ||
66 | DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; | ||
67 | DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; | ||
68 | DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; | ||
69 | DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; | ||
70 | DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; | ||
71 | |||
72 | DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2YOffset); | ||
73 | DECLARE_ALIGNED(8, extern const uint64_t, ff_w1111); | ||
74 | DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); | ||
75 | |||
76 | #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) | ||
77 | #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) | ||
78 | #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) | ||
79 | #define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5)) | ||
80 | #define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5)) | ||
81 | #define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5)) | ||
82 | #define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5)) | ||
83 | #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) | ||
84 | #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) | ||
85 | |||
86 | // Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one. | ||
87 | |||
88 | #define COMPILE_TEMPLATE_SSE2 0 | ||
89 | #define COMPILE_TEMPLATE_AVX 0 | ||
90 | |||
91 | // MMXEXT versions | ||
92 | #undef RENAME | ||
93 | #define RENAME(a) a ## _mmxext | ||
94 | #include "rgb2rgb_template.c" | ||
95 | |||
96 | //SSE2 versions | ||
97 | #undef RENAME | ||
98 | #undef COMPILE_TEMPLATE_SSE2 | ||
99 | #define COMPILE_TEMPLATE_SSE2 1 | ||
100 | #define RENAME(a) a ## _sse2 | ||
101 | #include "rgb2rgb_template.c" | ||
102 | |||
103 | //AVX versions | ||
104 | #undef RENAME | ||
105 | #undef COMPILE_TEMPLATE_AVX | ||
106 | #define COMPILE_TEMPLATE_AVX 1 | ||
107 | #define RENAME(a) a ## _avx | ||
108 | #include "rgb2rgb_template.c" | ||
109 | |||
110 | /* | ||
111 | RGB15->RGB16 original by Strepto/Astral | ||
112 | ported to gcc & bugfixed : A'rpi | ||
113 | MMXEXT, 3DNOW optimization by Nick Kurshev | ||
114 | 32-bit C version, and and&add trick by Michael Niedermayer | ||
115 | */ | ||
116 | |||
117 | #endif /* HAVE_INLINE_ASM */ | ||
118 | |||
119 | void ff_shuffle_bytes_2103_mmxext(const uint8_t *src, uint8_t *dst, int src_size); | ||
120 | void ff_shuffle_bytes_2103_ssse3(const uint8_t *src, uint8_t *dst, int src_size); | ||
121 | void ff_shuffle_bytes_0321_ssse3(const uint8_t *src, uint8_t *dst, int src_size); | ||
122 | void ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size); | ||
123 | void ff_shuffle_bytes_3012_ssse3(const uint8_t *src, uint8_t *dst, int src_size); | ||
124 | void ff_shuffle_bytes_3210_ssse3(const uint8_t *src, uint8_t *dst, int src_size); | ||
125 | |||
126 | #if ARCH_X86_64 | ||
127 | void ff_shuffle_bytes_2103_avx2(const uint8_t *src, uint8_t *dst, int src_size); | ||
128 | void ff_shuffle_bytes_0321_avx2(const uint8_t *src, uint8_t *dst, int src_size); | ||
129 | void ff_shuffle_bytes_1230_avx2(const uint8_t *src, uint8_t *dst, int src_size); | ||
130 | void ff_shuffle_bytes_3012_avx2(const uint8_t *src, uint8_t *dst, int src_size); | ||
131 | void ff_shuffle_bytes_3210_avx2(const uint8_t *src, uint8_t *dst, int src_size); | ||
132 | |||
133 | void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | ||
134 | const uint8_t *src, int width, int height, | ||
135 | int lumStride, int chromStride, int srcStride); | ||
136 | void ff_uyvytoyuv422_avx(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | ||
137 | const uint8_t *src, int width, int height, | ||
138 | int lumStride, int chromStride, int srcStride); | ||
139 | #endif | ||
140 | |||
141 | 3397 | av_cold void rgb2rgb_init_x86(void) | |
142 | { | ||
143 | 3397 | int cpu_flags = av_get_cpu_flags(); | |
144 | |||
145 | #if HAVE_INLINE_ASM | ||
146 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 3381 times.
|
3397 | if (INLINE_MMXEXT(cpu_flags)) |
147 | 16 | rgb2rgb_init_mmxext(); | |
148 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 3383 times.
|
3397 | if (INLINE_SSE2(cpu_flags)) |
149 | 14 | rgb2rgb_init_sse2(); | |
150 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 3389 times.
|
3397 | if (INLINE_AVX(cpu_flags)) |
151 | 8 | rgb2rgb_init_avx(); | |
152 | #endif /* HAVE_INLINE_ASM */ | ||
153 | |||
154 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 3381 times.
|
3397 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
155 | 16 | shuffle_bytes_2103 = ff_shuffle_bytes_2103_mmxext; | |
156 | } | ||
157 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 3383 times.
|
3397 | if (EXTERNAL_SSE2(cpu_flags)) { |
158 | #if ARCH_X86_64 | ||
159 | 14 | uyvytoyuv422 = ff_uyvytoyuv422_sse2; | |
160 | #endif | ||
161 | } | ||
162 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 3385 times.
|
3397 | if (EXTERNAL_SSSE3(cpu_flags)) { |
163 | 12 | shuffle_bytes_0321 = ff_shuffle_bytes_0321_ssse3; | |
164 | 12 | shuffle_bytes_2103 = ff_shuffle_bytes_2103_ssse3; | |
165 | 12 | shuffle_bytes_1230 = ff_shuffle_bytes_1230_ssse3; | |
166 | 12 | shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3; | |
167 | 12 | shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3; | |
168 | } | ||
169 | #if ARCH_X86_64 | ||
170 |
3/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3391 times.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
|
3397 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
171 | 6 | shuffle_bytes_0321 = ff_shuffle_bytes_0321_avx2; | |
172 | 6 | shuffle_bytes_2103 = ff_shuffle_bytes_2103_avx2; | |
173 | 6 | shuffle_bytes_1230 = ff_shuffle_bytes_1230_avx2; | |
174 | 6 | shuffle_bytes_3012 = ff_shuffle_bytes_3012_avx2; | |
175 | 6 | shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2; | |
176 | } | ||
177 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 3389 times.
|
3397 | if (EXTERNAL_AVX(cpu_flags)) { |
178 | 8 | uyvytoyuv422 = ff_uyvytoyuv422_avx; | |
179 | } | ||
180 | #endif | ||
181 | 3397 | } | |
182 |