Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * This file is part of FFmpeg. | ||
3 | * | ||
4 | * FFmpeg is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU Lesser General Public | ||
6 | * License as published by the Free Software Foundation; either | ||
7 | * version 2.1 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * FFmpeg is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * Lesser General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU Lesser General Public | ||
15 | * License along with FFmpeg; if not, write to the Free Software | ||
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | |||
19 | #include "config.h" | ||
20 | |||
21 | #include "pixelutils.h" | ||
22 | #include "cpu.h" | ||
23 | |||
24 | int ff_pixelutils_sad_8x8_mmxext(const uint8_t *src1, ptrdiff_t stride1, | ||
25 | const uint8_t *src2, ptrdiff_t stride2); | ||
26 | |||
27 | int ff_pixelutils_sad_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, | ||
28 | const uint8_t *src2, ptrdiff_t stride2); | ||
29 | int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, | ||
30 | const uint8_t *src2, ptrdiff_t stride2); | ||
31 | int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, | ||
32 | const uint8_t *src2, ptrdiff_t stride2); | ||
33 | |||
34 | int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, | ||
35 | const uint8_t *src2, ptrdiff_t stride2); | ||
36 | int ff_pixelutils_sad_a_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, | ||
37 | const uint8_t *src2, ptrdiff_t stride2); | ||
38 | int ff_pixelutils_sad_u_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, | ||
39 | const uint8_t *src2, ptrdiff_t stride2); | ||
40 | |||
41 | int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, | ||
42 | const uint8_t *src2, ptrdiff_t stride2); | ||
43 | int ff_pixelutils_sad_a_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, | ||
44 | const uint8_t *src2, ptrdiff_t stride2); | ||
45 | int ff_pixelutils_sad_u_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, | ||
46 | const uint8_t *src2, ptrdiff_t stride2); | ||
47 | |||
48 | 62 | void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) | |
49 | { | ||
50 | 62 | int cpu_flags = av_get_cpu_flags(); | |
51 | |||
52 | // The best way to use SSE2 would be to do 2 SADs in parallel, | ||
53 | // but we'd have to modify the pixelutils API to return SIMD functions. | ||
54 | |||
55 | // It's probably not faster to shuffle data around | ||
56 | // to get two lines of 8 pixels into a single 16byte register, | ||
57 | // so just use the MMX 8x8 version even when SSE2 is available. | ||
58 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 2 times.
|
62 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
59 | 60 | sad[2] = ff_pixelutils_sad_8x8_mmxext; | |
60 | } | ||
61 | |||
62 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 2 times.
|
62 | if (EXTERNAL_SSE2(cpu_flags)) { |
63 |
3/4✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
60 | switch (aligned) { |
64 | 20 | case 0: sad[3] = ff_pixelutils_sad_16x16_sse2; break; // src1 unaligned, src2 unaligned | |
65 | 20 | case 1: sad[3] = ff_pixelutils_sad_u_16x16_sse2; break; // src1 aligned, src2 unaligned | |
66 | 20 | case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned | |
67 | } | ||
68 | } | ||
69 | |||
70 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 2 times.
|
62 | if (EXTERNAL_SSE2(cpu_flags)) { |
71 |
3/4✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
60 | switch (aligned) { |
72 | 20 | case 0: sad[4] = ff_pixelutils_sad_32x32_sse2; break; // src1 unaligned, src2 unaligned | |
73 | 20 | case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1 aligned, src2 unaligned | |
74 | 20 | case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1 aligned, src2 aligned | |
75 | } | ||
76 | } | ||
77 | |||
78 |
3/4✓ Branch 0 taken 60 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 60 times.
✗ Branch 3 not taken.
|
62 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
79 |
3/4✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
60 | switch (aligned) { |
80 | 20 | case 0: sad[4] = ff_pixelutils_sad_32x32_avx2; break; // src1 unaligned, src2 unaligned | |
81 | 20 | case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1 aligned, src2 unaligned | |
82 | 20 | case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1 aligned, src2 aligned | |
83 | } | ||
84 | } | ||
85 | 62 | } | |
86 |