Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (C) 2010 David Conrad | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include "libavutil/x86/cpu.h" | ||
22 | #include "libavcodec/diracdsp.h" | ||
23 | #include "fpel.h" | ||
24 | |||
25 | void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); | ||
26 | |||
27 | void ff_add_dirac_obmc8_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
28 | void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
29 | void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
30 | |||
31 | void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | ||
32 | void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | ||
33 | void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); | ||
34 | |||
35 | void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); | ||
36 | |||
37 | #if HAVE_X86ASM | ||
38 | |||
39 | #define HPEL_FILTER(MMSIZE, EXT) \ | ||
40 | void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ | ||
41 | void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ | ||
42 | \ | ||
43 | static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ | ||
44 | const uint8_t *src, int stride, int width, int height) \ | ||
45 | { \ | ||
46 | while( height-- ) \ | ||
47 | { \ | ||
48 | ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ | ||
49 | ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ | ||
50 | ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ | ||
51 | \ | ||
52 | dsth += stride; \ | ||
53 | dstv += stride; \ | ||
54 | dstc += stride; \ | ||
55 | src += stride; \ | ||
56 | } \ | ||
57 | } | ||
58 | |||
59 | #define DIRAC_PIXOP(OPNAME, EXT)\ | ||
60 | static void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ | ||
61 | int stride, int h) \ | ||
62 | {\ | ||
63 | if (h&3)\ | ||
64 | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ | ||
65 | else\ | ||
66 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ | ||
67 | }\ | ||
68 | static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ | ||
69 | int stride, int h) \ | ||
70 | {\ | ||
71 | if (h&3) {\ | ||
72 | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ | ||
73 | } else {\ | ||
74 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ | ||
75 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ | ||
76 | }\ | ||
77 | } | ||
78 | |||
79 | ✗ | DIRAC_PIXOP(put, sse2) | |
80 | ✗ | DIRAC_PIXOP(avg, sse2) | |
81 | |||
82 | ✗ | HPEL_FILTER(16, sse2) | |
83 | |||
84 | #endif // HAVE_X86ASM | ||
85 | |||
86 | 109 | void ff_diracdsp_init_x86(DiracDSPContext* c) | |
87 | { | ||
88 | #if HAVE_X86ASM | ||
89 | 109 | int mm_flags = av_get_cpu_flags(); | |
90 | |||
91 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 82 times.
|
109 | if (EXTERNAL_SSE2(mm_flags)) { |
92 | 27 | c->dirac_hpel_filter = dirac_hpel_filter_sse2; | |
93 | 27 | c->add_rect_clamped = ff_add_rect_clamped_sse2; | |
94 | 27 | c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; | |
95 | |||
96 | 27 | c->add_dirac_obmc[0] = ff_add_dirac_obmc8_sse2; | |
97 | 27 | c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; | |
98 | 27 | c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; | |
99 | |||
100 | 27 | c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2; | |
101 | 27 | c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2; | |
102 | 27 | c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2; | |
103 | 27 | c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2; | |
104 | } | ||
105 | |||
106 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 91 times.
|
109 | if (EXTERNAL_SSE4(mm_flags)) { |
107 | 18 | c->dequant_subband[1] = ff_dequant_subband_32_sse4; | |
108 | 18 | c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; | |
109 | } | ||
110 | #endif // HAVE_X86ASM | ||
111 | 109 | } | |
112 |