Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (C) 2010 David Conrad | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include "libavutil/x86/cpu.h" | ||
22 | #include "libavcodec/diracdsp.h" | ||
23 | #include "fpel.h" | ||
24 | |||
25 | void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); | ||
26 | |||
27 | void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
28 | |||
29 | void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
30 | void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
31 | |||
32 | void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | ||
33 | void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | ||
34 | void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); | ||
35 | |||
36 | void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); | ||
37 | |||
38 | #if HAVE_X86ASM | ||
39 | |||
40 | #define HPEL_FILTER(MMSIZE, EXT) \ | ||
41 | void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ | ||
42 | void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ | ||
43 | \ | ||
44 | static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ | ||
45 | const uint8_t *src, int stride, int width, int height) \ | ||
46 | { \ | ||
47 | while( height-- ) \ | ||
48 | { \ | ||
49 | ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ | ||
50 | ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ | ||
51 | ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ | ||
52 | \ | ||
53 | dsth += stride; \ | ||
54 | dstv += stride; \ | ||
55 | dstc += stride; \ | ||
56 | src += stride; \ | ||
57 | } \ | ||
58 | } | ||
59 | |||
60 | #define PIXFUNC(PFX, IDX, EXT) \ | ||
61 | /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = PFX ## _dirac_pixels8_ ## EXT;*/ \ | ||
62 | c->PFX ## _dirac_pixels_tab[1][IDX] = PFX ## _dirac_pixels16_ ## EXT; \ | ||
63 | c->PFX ## _dirac_pixels_tab[2][IDX] = PFX ## _dirac_pixels32_ ## EXT | ||
64 | |||
65 | #define DIRAC_PIXOP(OPNAME, EXT)\ | ||
66 | static void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ | ||
67 | int stride, int h) \ | ||
68 | {\ | ||
69 | if (h&3)\ | ||
70 | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ | ||
71 | else\ | ||
72 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ | ||
73 | }\ | ||
74 | static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ | ||
75 | int stride, int h) \ | ||
76 | {\ | ||
77 | if (h&3) {\ | ||
78 | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ | ||
79 | } else {\ | ||
80 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ | ||
81 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ | ||
82 | }\ | ||
83 | } | ||
84 | |||
85 | ✗ | DIRAC_PIXOP(put, sse2) | |
86 | ✗ | DIRAC_PIXOP(avg, sse2) | |
87 | |||
88 | ✗ | HPEL_FILTER(16, sse2) | |
89 | |||
90 | #endif // HAVE_X86ASM | ||
91 | |||
92 | 70 | void ff_diracdsp_init_x86(DiracDSPContext* c) | |
93 | { | ||
94 | #if HAVE_X86ASM | ||
95 | 70 | int mm_flags = av_get_cpu_flags(); | |
96 | |||
97 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (EXTERNAL_MMX(mm_flags)) { |
98 | ✗ | c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; | |
99 | } | ||
100 | |||
101 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (EXTERNAL_SSE2(mm_flags)) { |
102 | ✗ | c->dirac_hpel_filter = dirac_hpel_filter_sse2; | |
103 | ✗ | c->add_rect_clamped = ff_add_rect_clamped_sse2; | |
104 | ✗ | c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; | |
105 | |||
106 | ✗ | c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; | |
107 | ✗ | c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; | |
108 | |||
109 | ✗ | c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2; | |
110 | ✗ | c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2; | |
111 | ✗ | c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2; | |
112 | ✗ | c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2; | |
113 | } | ||
114 | |||
115 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 70 times.
|
70 | if (EXTERNAL_SSE4(mm_flags)) { |
116 | ✗ | c->dequant_subband[1] = ff_dequant_subband_32_sse4; | |
117 | ✗ | c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; | |
118 | } | ||
119 | #endif | ||
120 | 70 | } | |
121 |