| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * This file is part of FFmpeg. | ||
| 3 | * | ||
| 4 | * FFmpeg is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU Lesser General Public | ||
| 6 | * License as published by the Free Software Foundation; either | ||
| 7 | * version 2.1 of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 12 | * Lesser General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU Lesser General Public | ||
| 15 | * License along with FFmpeg; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include "config.h" | ||
| 20 | #include "libavutil/attributes.h" | ||
| 21 | #include "libavutil/cpu.h" | ||
| 22 | #include "libavutil/x86/cpu.h" | ||
| 23 | #include "libavcodec/avcodec.h" | ||
| 24 | #include "libavcodec/idctdsp.h" | ||
| 25 | #include "idctdsp.h" | ||
| 26 | #include "simple_idct.h" | ||
| 27 | |||
| 28 | /* Input permutation for the simple_idct_mmx */ | ||
| 29 | static const uint8_t simple_mmx_permutation[64] = { | ||
| 30 | 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | ||
| 31 | 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | ||
| 32 | 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | ||
| 33 | 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | ||
| 34 | 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | ||
| 35 | 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | ||
| 36 | 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | ||
| 37 | 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | ||
| 38 | }; | ||
| 39 | |||
| 40 | static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 }; | ||
| 41 | |||
| 42 | 1719 | av_cold int ff_init_scantable_permutation_x86(uint8_t *idct_permutation, | |
| 43 | enum idct_permutation_type perm_type) | ||
| 44 | { | ||
| 45 | int i; | ||
| 46 | |||
| 47 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1717 times.
|
1719 | switch (perm_type) { |
| 48 | #if ARCH_X86_32 | ||
| 49 | case FF_IDCT_PERM_SIMPLE: | ||
| 50 | for (i = 0; i < 64; i++) | ||
| 51 | idct_permutation[i] = simple_mmx_permutation[i]; | ||
| 52 | return 1; | ||
| 53 | #endif | ||
| 54 | 2 | case FF_IDCT_PERM_SSE2: | |
| 55 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 2 times.
|
130 | for (i = 0; i < 64; i++) |
| 56 | 128 | idct_permutation[i] = (i & 0x38) | idct_sse2_row_perm[i & 7]; | |
| 57 | 2 | return 1; | |
| 58 | } | ||
| 59 | |||
| 60 | 1717 | return 0; | |
| 61 | } | ||
| 62 | |||
| 63 | 1575 | av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, | |
| 64 | unsigned high_bit_depth) | ||
| 65 | { | ||
| 66 | 1575 | int cpu_flags = av_get_cpu_flags(); | |
| 67 | |||
| 68 | #if ARCH_X86_32 | ||
| 69 | if (EXTERNAL_MMX(cpu_flags)) { | ||
| 70 | if (!high_bit_depth && | ||
| 71 | avctx->lowres == 0 && | ||
| 72 | (avctx->idct_algo == FF_IDCT_AUTO || | ||
| 73 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || | ||
| 74 | avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { | ||
| 75 | c->idct = ff_simple_idct_mmx; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | #endif | ||
| 79 | |||
| 80 |
2/2✓ Branch 0 taken 254 times.
✓ Branch 1 taken 1321 times.
|
1575 | if (EXTERNAL_SSE2(cpu_flags)) { |
| 81 | 254 | c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; | |
| 82 | 254 | c->put_pixels_clamped = ff_put_pixels_clamped_sse2; | |
| 83 | 254 | c->add_pixels_clamped = ff_add_pixels_clamped_sse2; | |
| 84 | |||
| 85 | #if ARCH_X86_32 | ||
| 86 | if (!high_bit_depth && | ||
| 87 | avctx->lowres == 0 && | ||
| 88 | (avctx->idct_algo == FF_IDCT_AUTO || | ||
| 89 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || | ||
| 90 | avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { | ||
| 91 | c->idct_put = ff_simple_idct_put_sse2; | ||
| 92 | c->idct_add = ff_simple_idct_add_sse2; | ||
| 93 | c->perm_type = FF_IDCT_PERM_SIMPLE; | ||
| 94 | } | ||
| 95 | #endif | ||
| 96 | |||
| 97 |
1/2✓ Branch 0 taken 254 times.
✗ Branch 1 not taken.
|
254 | if (ARCH_X86_64 && |
| 98 | 254 | !high_bit_depth && | |
| 99 |
1/2✓ Branch 0 taken 254 times.
✗ Branch 1 not taken.
|
254 | avctx->lowres == 0 && |
| 100 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 251 times.
|
254 | (avctx->idct_algo == FF_IDCT_AUTO || |
| 101 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || |
| 102 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLEMMX || |
| 103 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLE)) { |
| 104 | 252 | c->idct = ff_simple_idct8_sse2; | |
| 105 | 252 | c->idct_put = ff_simple_idct8_put_sse2; | |
| 106 | 252 | c->idct_add = ff_simple_idct8_add_sse2; | |
| 107 | 252 | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 |
2/2✓ Branch 0 taken 1574 times.
✓ Branch 1 taken 1 times.
|
1575 | if (ARCH_X86_64 && avctx->lowres == 0) { |
| 112 |
3/4✓ Branch 0 taken 248 times.
✓ Branch 1 taken 1326 times.
✓ Branch 2 taken 248 times.
✗ Branch 3 not taken.
|
1574 | if (EXTERNAL_AVX(cpu_flags) && |
| 113 | 248 | !high_bit_depth && | |
| 114 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 245 times.
|
248 | (avctx->idct_algo == FF_IDCT_AUTO || |
| 115 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || |
| 116 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLEMMX || |
| 117 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | avctx->idct_algo == FF_IDCT_SIMPLE)) { |
| 118 | 246 | c->idct = ff_simple_idct8_avx; | |
| 119 | 246 | c->idct_put = ff_simple_idct8_put_avx; | |
| 120 | 246 | c->idct_add = ff_simple_idct8_add_avx; | |
| 121 | 246 | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
| 122 | } | ||
| 123 | |||
| 124 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 1549 times.
|
1574 | if (avctx->bits_per_raw_sample == 10 && |
| 125 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 2 times.
|
25 | avctx->codec_id != AV_CODEC_ID_MPEG4 && |
| 126 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | (avctx->idct_algo == FF_IDCT_AUTO || |
| 127 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | avctx->idct_algo == FF_IDCT_SIMPLEAUTO || |
| 128 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | avctx->idct_algo == FF_IDCT_SIMPLE)) { |
| 129 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | if (EXTERNAL_SSE2(cpu_flags)) { |
| 130 | ✗ | c->idct_put = ff_simple_idct10_put_sse2; | |
| 131 | ✗ | c->idct_add = NULL; | |
| 132 | ✗ | c->idct = ff_simple_idct10_sse2; | |
| 133 | ✗ | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
| 134 | |||
| 135 | } | ||
| 136 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | if (EXTERNAL_AVX(cpu_flags)) { |
| 137 | ✗ | c->idct_put = ff_simple_idct10_put_avx; | |
| 138 | ✗ | c->idct_add = NULL; | |
| 139 | ✗ | c->idct = ff_simple_idct10_avx; | |
| 140 | ✗ | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1570 times.
|
1574 | if (avctx->bits_per_raw_sample == 12 && |
| 145 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | (avctx->idct_algo == FF_IDCT_AUTO || |
| 146 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { |
| 147 | ✗ | if (EXTERNAL_SSE2(cpu_flags)) { | |
| 148 | ✗ | c->idct_put = ff_simple_idct12_put_sse2; | |
| 149 | ✗ | c->idct_add = NULL; | |
| 150 | ✗ | c->idct = ff_simple_idct12_sse2; | |
| 151 | ✗ | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
| 152 | } | ||
| 153 | ✗ | if (EXTERNAL_AVX(cpu_flags)) { | |
| 154 | ✗ | c->idct_put = ff_simple_idct12_put_avx; | |
| 155 | ✗ | c->idct_add = NULL; | |
| 156 | ✗ | c->idct = ff_simple_idct12_avx; | |
| 157 | ✗ | c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
| 158 | } | ||
| 159 | } | ||
| 160 | } | ||
| 161 | 1575 | } | |
| 162 |