1 |
|
|
/* |
2 |
|
|
* This file is part of FFmpeg. |
3 |
|
|
* |
4 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
5 |
|
|
* modify it under the terms of the GNU Lesser General Public |
6 |
|
|
* License as published by the Free Software Foundation; either |
7 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
8 |
|
|
* |
9 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
10 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 |
|
|
* Lesser General Public License for more details. |
13 |
|
|
* |
14 |
|
|
* You should have received a copy of the GNU Lesser General Public |
15 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
16 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 |
|
|
*/ |
18 |
|
|
|
19 |
|
|
#include <stdint.h> |
20 |
|
|
|
21 |
|
|
#include "config.h" |
22 |
|
|
#include "libavutil/attributes.h" |
23 |
|
|
#include "libavutil/cpu.h" |
24 |
|
|
#include "libavutil/x86/cpu.h" |
25 |
|
|
#include "libavcodec/h264chroma.h" |
26 |
|
|
|
27 |
|
|
void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src, |
28 |
|
|
ptrdiff_t stride, int h, int x, int y); |
29 |
|
|
void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src, |
30 |
|
|
ptrdiff_t stride, int h, int x, int y); |
31 |
|
|
void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src, |
32 |
|
|
ptrdiff_t stride, int h, int x, int y); |
33 |
|
|
|
34 |
|
|
void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, |
35 |
|
|
ptrdiff_t stride, int h, int x, int y); |
36 |
|
|
void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src, |
37 |
|
|
ptrdiff_t stride, int h, int x, int y); |
38 |
|
|
void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, |
39 |
|
|
ptrdiff_t stride, int h, int x, int y); |
40 |
|
|
|
41 |
|
|
void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, |
42 |
|
|
ptrdiff_t stride, int h, int x, int y); |
43 |
|
|
void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src, |
44 |
|
|
ptrdiff_t stride, int h, int x, int y); |
45 |
|
|
|
46 |
|
|
void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, |
47 |
|
|
ptrdiff_t stride, int h, int x, int y); |
48 |
|
|
void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
49 |
|
|
ptrdiff_t stride, int h, int x, int y); |
50 |
|
|
|
51 |
|
|
void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src, |
52 |
|
|
ptrdiff_t stride, int h, int x, int y); |
53 |
|
|
void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
54 |
|
|
ptrdiff_t stride, int h, int x, int y); |
55 |
|
|
|
56 |
|
|
#define CHROMA_MC(OP, NUM, DEPTH, OPT) \ |
57 |
|
|
void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \ |
58 |
|
|
(uint8_t *dst, uint8_t *src, \ |
59 |
|
|
ptrdiff_t stride, int h, int x, int y); |
60 |
|
|
|
61 |
|
|
CHROMA_MC(put, 2, 10, mmxext) |
62 |
|
|
CHROMA_MC(avg, 2, 10, mmxext) |
63 |
|
|
CHROMA_MC(put, 4, 10, mmxext) |
64 |
|
|
CHROMA_MC(avg, 4, 10, mmxext) |
65 |
|
|
CHROMA_MC(put, 8, 10, sse2) |
66 |
|
|
CHROMA_MC(avg, 8, 10, sse2) |
67 |
|
|
CHROMA_MC(put, 8, 10, avx) |
68 |
|
|
CHROMA_MC(avg, 8, 10, avx) |
69 |
|
|
|
70 |
|
1334 |
av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) |
71 |
|
|
{ |
72 |
|
1334 |
int high_bit_depth = bit_depth > 8; |
73 |
|
1334 |
int cpu_flags = av_get_cpu_flags(); |
74 |
|
|
|
75 |
✓✓✓✗
|
1334 |
if (EXTERNAL_MMX(cpu_flags) && !high_bit_depth) { |
76 |
|
151 |
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx; |
77 |
|
151 |
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; |
78 |
|
|
} |
79 |
|
|
|
80 |
✗✓✗✗
|
1334 |
if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) { |
81 |
|
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow; |
82 |
|
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; |
83 |
|
|
} |
84 |
|
|
|
85 |
✓✓✓✗
|
1334 |
if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) { |
86 |
|
151 |
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext; |
87 |
|
151 |
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext; |
88 |
|
151 |
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext; |
89 |
|
151 |
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; |
90 |
|
|
} |
91 |
|
|
|
92 |
✓✓✗✓ ✗✗ |
1334 |
if (EXTERNAL_MMXEXT(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { |
93 |
|
|
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; |
94 |
|
|
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; |
95 |
|
|
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; |
96 |
|
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; |
97 |
|
|
} |
98 |
|
|
|
99 |
✓✓✗✓ ✗✗ |
1334 |
if (EXTERNAL_SSE2(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { |
100 |
|
|
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; |
101 |
|
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; |
102 |
|
|
} |
103 |
|
|
|
104 |
✓✓✓✗
|
1334 |
if (EXTERNAL_SSSE3(cpu_flags) && !high_bit_depth) { |
105 |
|
151 |
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3; |
106 |
|
151 |
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3; |
107 |
|
151 |
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3; |
108 |
|
151 |
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; |
109 |
|
|
} |
110 |
|
|
|
111 |
✓✓✗✓ ✗✗ |
1334 |
if (EXTERNAL_AVX(cpu_flags) && bit_depth > 8 && bit_depth <= 10) { |
112 |
|
|
// AVX implies !cache64. |
113 |
|
|
// TODO: Port cache(32|64) detection from x264. |
114 |
|
|
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx; |
115 |
|
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx; |
116 |
|
|
} |
117 |
|
1334 |
} |