Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * This file is part of FFmpeg. | ||
3 | * | ||
4 | * FFmpeg is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU Lesser General Public | ||
6 | * License as published by the Free Software Foundation; either | ||
7 | * version 2.1 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * FFmpeg is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * Lesser General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU Lesser General Public | ||
15 | * License along with FFmpeg; if not, write to the Free Software | ||
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | |||
19 | #include "config.h" | ||
20 | #include "libavutil/attributes.h" | ||
21 | #include "libavutil/cpu.h" | ||
22 | #include "libavutil/x86/cpu.h" | ||
23 | #include "libavcodec/avcodec.h" | ||
24 | #include "libavcodec/idctdsp.h" | ||
25 | #include "idctdsp.h" | ||
26 | #include "simple_idct.h" | ||
27 | |||
28 | /* Input permutation for the simple_idct_mmx */ | ||
29 | static const uint8_t simple_mmx_permutation[64] = { | ||
30 | 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | ||
31 | 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | ||
32 | 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | ||
33 | 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | ||
34 | 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | ||
35 | 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | ||
36 | 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | ||
37 | 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | ||
38 | }; | ||
39 | |||
40 | static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 }; | ||
41 | |||
42 | 1742 | av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, | |
43 | enum idct_permutation_type perm_type) | ||
44 | { | ||
45 | int i; | ||
46 | |||
47 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1738 times.
|
1742 | switch (perm_type) { |
48 | #if ARCH_X86_32 | ||
49 | case FF_IDCT_PERM_SIMPLE: | ||
50 | for (i = 0; i < 64; i++) | ||
51 | idct_permutation[i] = simple_mmx_permutation[i]; | ||
52 | return 1; | ||
53 | #endif | ||
54 | 4 | case FF_IDCT_PERM_SSE2: | |
55 |
2/2✓ Branch 0 taken 256 times.
✓ Branch 1 taken 4 times.
|
260 | for (i = 0; i < 64; i++) |
56 | 256 | idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7]; | |
57 | 4 | return 1; | |
58 | } | ||
59 | |||
60 | 1738 | return 0; | |
61 | } | ||
62 | |||
63 | 1524 | av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, | |
64 | unsigned high_bit_depth) | ||
65 | { | ||
66 | 1524 | int cpu_flags = av_get_cpu_flags(); | |
67 | |||
68 | #if ARCH_X86_32 | ||
69 | if (EXTERNAL_MMX(cpu_flags)) { | ||
70 | if (!high_bit_depth && | ||
71 | avctx->lowres == 0 && | ||
72 | (avctx->idct_algo == FF_IDCT_AUTO || | ||
73 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || | ||
74 | avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { | ||
75 | c->idct = ff_simple_idct_mmx; | ||
76 | } | ||
77 | } | ||
78 | #endif | ||
79 | |||
80 |
2/2✓ Branch 0 taken 240 times.
✓ Branch 1 taken 1284 times.
|
1524 | if (EXTERNAL_SSE2(cpu_flags)) { |
81 | 240 | c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; | |
82 | 240 | c->put_pixels_clamped = ff_put_pixels_clamped_sse2; | |
83 | 240 | c->add_pixels_clamped = ff_add_pixels_clamped_sse2; | |
84 | |||
85 | #if ARCH_X86_32 | ||
86 | if (!high_bit_depth && | ||
87 | avctx->lowres == 0 && | ||
88 | (avctx->idct_algo == FF_IDCT_AUTO || | ||
89 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || | ||
90 | avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { | ||
91 | c->idct_put = ff_simple_idct_put_sse2; | ||
92 | c->idct_add = ff_simple_idct_add_sse2; | ||
93 | c->perm_type = FF_IDCT_PERM_SIMPLE; | ||
94 | } | ||
95 | #endif | ||
96 | |||
97 |
1/2✓ Branch 0 taken 240 times.
✗ Branch 1 not taken.
|
240 | if (ARCH_X86_64 && |
98 | 240 | !high_bit_depth && | |
99 |
1/2✓ Branch 0 taken 240 times.
✗ Branch 1 not taken.
|
240 | avctx->lowres == 0 && |
100 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 237 times.
|
240 | (avctx->idct_algo == FF_IDCT_AUTO || |
101 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || |
102 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLEMMX || |
103 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLE)) { |
104 | 238 | c->idct = ff_simple_idct8_sse2; | |
105 | 238 | c->idct_put = ff_simple_idct8_put_sse2; | |
106 | 238 | c->idct_add = ff_simple_idct8_add_sse2; | |
107 | 238 | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
108 | } | ||
109 | } | ||
110 | |||
111 |
2/2✓ Branch 0 taken 1523 times.
✓ Branch 1 taken 1 times.
|
1524 | if (ARCH_X86_64 && avctx->lowres == 0) { |
112 |
3/4✓ Branch 0 taken 234 times.
✓ Branch 1 taken 1289 times.
✓ Branch 2 taken 234 times.
✗ Branch 3 not taken.
|
1523 | if (EXTERNAL_AVX(cpu_flags) && |
113 | 234 | !high_bit_depth && | |
114 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 231 times.
|
234 | (avctx->idct_algo == FF_IDCT_AUTO || |
115 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || |
116 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLEMMX || |
117 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLE)) { |
118 | 232 | c->idct = ff_simple_idct8_avx; | |
119 | 232 | c->idct_put = ff_simple_idct8_put_avx; | |
120 | 232 | c->idct_add = ff_simple_idct8_add_avx; | |
121 | 232 | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
122 | } | ||
123 | |||
124 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 1498 times.
|
1523 | if (avctx->bits_per_raw_sample == 10 && |
125 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 2 times.
|
25 | avctx->codec_id != AV_CODEC_ID_MPEG4 && |
126 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | (avctx->idct_algo == FF_IDCT_AUTO || |
127 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || |
128 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | avctx->idct_algo == FF_IDCT_SIMPLE)) { |
129 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | if (EXTERNAL_SSE2(cpu_flags)) { |
130 | ✗ | c->idct_put = ff_simple_idct10_put_sse2; | |
131 | ✗ | c->idct_add = NULL; | |
132 | ✗ | c->idct = ff_simple_idct10_sse2; | |
133 | ✗ | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
134 | |||
135 | } | ||
136 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | if (EXTERNAL_AVX(cpu_flags)) { |
137 | ✗ | c->idct_put = ff_simple_idct10_put_avx; | |
138 | ✗ | c->idct_add = NULL; | |
139 | ✗ | c->idct = ff_simple_idct10_avx; | |
140 | ✗ | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
141 | } | ||
142 | } | ||
143 | |||
144 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1519 times.
|
1523 | if (avctx->bits_per_raw_sample == 12 && |
145 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | (avctx->idct_algo == FF_IDCT_AUTO || |
146 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { |
147 | ✗ | if (EXTERNAL_SSE2(cpu_flags)) { | |
148 | ✗ | c->idct_put = ff_simple_idct12_put_sse2; | |
149 | ✗ | c->idct_add = NULL; | |
150 | ✗ | c->idct = ff_simple_idct12_sse2; | |
151 | ✗ | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
152 | } | ||
153 | ✗ | if (EXTERNAL_AVX(cpu_flags)) { | |
154 | ✗ | c->idct_put = ff_simple_idct12_put_avx; | |
155 | ✗ | c->idct_add = NULL; | |
156 | ✗ | c->idct = ff_simple_idct12_avx; | |
157 | ✗ | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
158 | } | ||
159 | } | ||
160 | } | ||
161 | 1524 | } | |
162 |