FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavfilter/x86/vf_spp.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 4 34 11.8%
Functions: 1 4 25.0%
Branches: 1 11 9.1%

Line Branch Exec Source
1 /*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21
22 #include "libavutil/attributes.h"
23 #include "libavutil/cpu.h"
24 #include "libavutil/crc.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavfilter/vf_spp.h"
27
28 #if HAVE_MMX_INLINE
29 static void hardthresh_mmx(int16_t dst[64], const int16_t src[64],
30 int qp, const uint8_t *permutation)
31 {
32 int bias = 0; //FIXME
33 unsigned int threshold1;
34
35 threshold1 = qp * ((1<<4) - bias) - 1;
36
37 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
38 "movq " #src0 ", %%mm0 \n" \
39 "movq " #src1 ", %%mm1 \n" \
40 "movq " #src2 ", %%mm2 \n" \
41 "movq " #src3 ", %%mm3 \n" \
42 "psubw %%mm4, %%mm0 \n" \
43 "psubw %%mm4, %%mm1 \n" \
44 "psubw %%mm4, %%mm2 \n" \
45 "psubw %%mm4, %%mm3 \n" \
46 "paddusw %%mm5, %%mm0 \n" \
47 "paddusw %%mm5, %%mm1 \n" \
48 "paddusw %%mm5, %%mm2 \n" \
49 "paddusw %%mm5, %%mm3 \n" \
50 "paddw %%mm6, %%mm0 \n" \
51 "paddw %%mm6, %%mm1 \n" \
52 "paddw %%mm6, %%mm2 \n" \
53 "paddw %%mm6, %%mm3 \n" \
54 "psubusw %%mm6, %%mm0 \n" \
55 "psubusw %%mm6, %%mm1 \n" \
56 "psubusw %%mm6, %%mm2 \n" \
57 "psubusw %%mm6, %%mm3 \n" \
58 "psraw $3, %%mm0 \n" \
59 "psraw $3, %%mm1 \n" \
60 "psraw $3, %%mm2 \n" \
61 "psraw $3, %%mm3 \n" \
62 \
63 "movq %%mm0, %%mm7 \n" \
64 "punpcklwd %%mm2, %%mm0 \n" /*A*/ \
65 "punpckhwd %%mm2, %%mm7 \n" /*C*/ \
66 "movq %%mm1, %%mm2 \n" \
67 "punpcklwd %%mm3, %%mm1 \n" /*B*/ \
68 "punpckhwd %%mm3, %%mm2 \n" /*D*/ \
69 "movq %%mm0, %%mm3 \n" \
70 "punpcklwd %%mm1, %%mm0 \n" /*A*/ \
71 "punpckhwd %%mm7, %%mm3 \n" /*C*/ \
72 "punpcklwd %%mm2, %%mm7 \n" /*B*/ \
73 "punpckhwd %%mm2, %%mm1 \n" /*D*/ \
74 \
75 "movq %%mm0, " #dst0 " \n" \
76 "movq %%mm7, " #dst1 " \n" \
77 "movq %%mm3, " #dst2 " \n" \
78 "movq %%mm1, " #dst3 " \n"
79
80 __asm__ volatile(
81 "movd %2, %%mm4 \n"
82 "movd %3, %%mm5 \n"
83 "movd %4, %%mm6 \n"
84 "packssdw %%mm4, %%mm4 \n"
85 "packssdw %%mm5, %%mm5 \n"
86 "packssdw %%mm6, %%mm6 \n"
87 "packssdw %%mm4, %%mm4 \n"
88 "packssdw %%mm5, %%mm5 \n"
89 "packssdw %%mm6, %%mm6 \n"
90 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
91 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
92 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
93 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
94 : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed?
95 );
96 dst[0] = (src[0] + 4) >> 3;
97 }
98
99 static void softthresh_mmx(int16_t dst[64], const int16_t src[64],
100 int qp, const uint8_t *permutation)
101 {
102 int bias = 0; //FIXME
103 unsigned int threshold1;
104
105 threshold1 = qp*((1<<4) - bias) - 1;
106
107 #undef REQUANT_CORE
108 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
109 "movq " #src0 ", %%mm0 \n" \
110 "movq " #src1 ", %%mm1 \n" \
111 "pxor %%mm6, %%mm6 \n" \
112 "pxor %%mm7, %%mm7 \n" \
113 "pcmpgtw %%mm0, %%mm6 \n" \
114 "pcmpgtw %%mm1, %%mm7 \n" \
115 "pxor %%mm6, %%mm0 \n" \
116 "pxor %%mm7, %%mm1 \n" \
117 "psubusw %%mm4, %%mm0 \n" \
118 "psubusw %%mm4, %%mm1 \n" \
119 "pxor %%mm6, %%mm0 \n" \
120 "pxor %%mm7, %%mm1 \n" \
121 "movq " #src2 ", %%mm2 \n" \
122 "movq " #src3 ", %%mm3 \n" \
123 "pxor %%mm6, %%mm6 \n" \
124 "pxor %%mm7, %%mm7 \n" \
125 "pcmpgtw %%mm2, %%mm6 \n" \
126 "pcmpgtw %%mm3, %%mm7 \n" \
127 "pxor %%mm6, %%mm2 \n" \
128 "pxor %%mm7, %%mm3 \n" \
129 "psubusw %%mm4, %%mm2 \n" \
130 "psubusw %%mm4, %%mm3 \n" \
131 "pxor %%mm6, %%mm2 \n" \
132 "pxor %%mm7, %%mm3 \n" \
133 \
134 "paddsw %%mm5, %%mm0 \n" \
135 "paddsw %%mm5, %%mm1 \n" \
136 "paddsw %%mm5, %%mm2 \n" \
137 "paddsw %%mm5, %%mm3 \n" \
138 "psraw $3, %%mm0 \n" \
139 "psraw $3, %%mm1 \n" \
140 "psraw $3, %%mm2 \n" \
141 "psraw $3, %%mm3 \n" \
142 \
143 "movq %%mm0, %%mm7 \n" \
144 "punpcklwd %%mm2, %%mm0 \n" /*A*/ \
145 "punpckhwd %%mm2, %%mm7 \n" /*C*/ \
146 "movq %%mm1, %%mm2 \n" \
147 "punpcklwd %%mm3, %%mm1 \n" /*B*/ \
148 "punpckhwd %%mm3, %%mm2 \n" /*D*/ \
149 "movq %%mm0, %%mm3 \n" \
150 "punpcklwd %%mm1, %%mm0 \n" /*A*/ \
151 "punpckhwd %%mm7, %%mm3 \n" /*C*/ \
152 "punpcklwd %%mm2, %%mm7 \n" /*B*/ \
153 "punpckhwd %%mm2, %%mm1 \n" /*D*/ \
154 \
155 "movq %%mm0, " #dst0 " \n" \
156 "movq %%mm7, " #dst1 " \n" \
157 "movq %%mm3, " #dst2 " \n" \
158 "movq %%mm1, " #dst3 " \n"
159
160 __asm__ volatile(
161 "movd %2, %%mm4 \n"
162 "movd %3, %%mm5 \n"
163 "packssdw %%mm4, %%mm4 \n"
164 "packssdw %%mm5, %%mm5 \n"
165 "packssdw %%mm4, %%mm4 \n"
166 "packssdw %%mm5, %%mm5 \n"
167 REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
168 REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
169 REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
170 REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
171 : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed?
172 );
173
174 dst[0] = (src[0] + 4) >> 3;
175 }
176
177 static void store_slice_mmx(uint8_t *dst, const int16_t *src,
178 int dst_stride, int src_stride,
179 int width, int height, int log2_scale,
180 const uint8_t dither[8][8])
181 {
182 int y;
183
184 for (y = 0; y < height; y++) {
185 uint8_t *dst1 = dst;
186 const int16_t *src1 = src;
187 __asm__ volatile(
188 "movq (%3), %%mm3 \n"
189 "movq (%3), %%mm4 \n"
190 "movd %4, %%mm2 \n"
191 "pxor %%mm0, %%mm0 \n"
192 "punpcklbw %%mm0, %%mm3 \n"
193 "punpckhbw %%mm0, %%mm4 \n"
194 "psraw %%mm2, %%mm3 \n"
195 "psraw %%mm2, %%mm4 \n"
196 "movd %5, %%mm2 \n"
197 "1: \n"
198 "movq (%0), %%mm0 \n"
199 "movq 8(%0), %%mm1 \n"
200 "paddw %%mm3, %%mm0 \n"
201 "paddw %%mm4, %%mm1 \n"
202 "psraw %%mm2, %%mm0 \n"
203 "psraw %%mm2, %%mm1 \n"
204 "packuswb %%mm1, %%mm0 \n"
205 "movq %%mm0, (%1) \n"
206 "add $16, %0 \n"
207 "add $8, %1 \n"
208 "cmp %2, %1 \n"
209 " jb 1b \n"
210 : "+r" (src1), "+r"(dst1)
211 : "r"(dst + width), "r"(dither[y]), "g"(log2_scale), "g"(MAX_LEVEL - log2_scale)
212 );
213 src += src_stride;
214 dst += dst_stride;
215 }
216 }
217
218 #endif /* HAVE_MMX_INLINE */
219
220 1 av_cold void ff_spp_init_x86(SPPContext *s)
221 {
222 #if HAVE_MMX_INLINE
223 1 int cpu_flags = av_get_cpu_flags();
224
225
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (cpu_flags & AV_CPU_FLAG_MMX) {
226 static const uint32_t mmx_idct_perm_crc = 0xe5e8adc4;
227 uint32_t idct_perm_crc =
228 av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0,
229 s->dct->idct_permutation,
230 sizeof(s->dct->idct_permutation));
231 int64_t bps;
232 s->store_slice = store_slice_mmx;
233 av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
234 if (bps <= 8 && idct_perm_crc == mmx_idct_perm_crc) {
235 switch (s->mode) {
236 case 0: s->requantize = hardthresh_mmx; break;
237 case 1: s->requantize = softthresh_mmx; break;
238 }
239 }
240 }
241 #endif
242 1 }
243