Line data Source code
1 : /*
2 : * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 : * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
4 : *
5 : * This file is part of FFmpeg.
6 : *
7 : * FFmpeg is free software; you can redistribute it and/or
8 : * modify it under the terms of the GNU Lesser General Public
9 : * License as published by the Free Software Foundation; either
10 : * version 2.1 of the License, or (at your option) any later version.
11 : *
12 : * FFmpeg is distributed in the hope that it will be useful,
13 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : * Lesser General Public License for more details.
16 : *
17 : * You should have received a copy of the GNU Lesser General Public
18 : * License along with FFmpeg; if not, write to the Free Software
19 : * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 : */
21 :
22 : #include "libavutil/common.h"
23 : #include "libavutil/intreadwrite.h"
24 :
25 : #include "bit_depth_template.c"
26 : #include "hpel_template.c"
27 : #include "pel_template.c"
28 :
29 65134 : static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
30 : {
31 : int i;
32 521072 : for(i=0; i<h; i++)
33 : {
34 455938 : AV_WN2P(dst , AV_RN2P(src ));
35 455938 : dst+=dstStride;
36 455938 : src+=srcStride;
37 : }
38 65134 : }
39 :
40 12685488 : static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
41 : {
42 : int i;
43 126854880 : for(i=0; i<h; i++)
44 : {
45 114169392 : AV_WN4P(dst , AV_RN4P(src ));
46 114169392 : dst+=dstStride;
47 114169392 : src+=srcStride;
48 : }
49 12685488 : }
50 :
51 6640530 : static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
52 : {
53 : int i;
54 92967420 : for(i=0; i<h; i++)
55 : {
56 86326890 : AV_WN4P(dst , AV_RN4P(src ));
57 86326890 : AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
58 86326890 : dst+=dstStride;
59 86326890 : src+=srcStride;
60 : }
61 6640530 : }
62 :
63 3218159 : static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
64 : {
65 : int i;
66 70799498 : for(i=0; i<h; i++)
67 : {
68 67581339 : AV_WN4P(dst , AV_RN4P(src ));
69 67581339 : AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
70 67581339 : AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
71 67581339 : AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
72 67581339 : dst+=dstStride;
73 67581339 : src+=srcStride;
74 : }
75 3218159 : }
76 :
77 : #define H264_LOWPASS(OPNAME, OP, OP2) \
78 : static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, const uint8_t *p_src, int dstStride, int srcStride){\
79 : const int h=2;\
80 : INIT_CLIP\
81 : int i;\
82 : pixel *dst = (pixel*)p_dst;\
83 : const pixel *src = (const pixel*)p_src;\
84 : dstStride >>= sizeof(pixel)-1;\
85 : srcStride >>= sizeof(pixel)-1;\
86 : for(i=0; i<h; i++)\
87 : {\
88 : OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
89 : OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
90 : dst+=dstStride;\
91 : src+=srcStride;\
92 : }\
93 : }\
94 : \
95 : static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
96 : const int w=2;\
97 : INIT_CLIP\
98 : int i;\
99 : pixel *dst = (pixel*)_dst;\
100 : const pixel *src = (const pixel*)_src;\
101 : dstStride >>= sizeof(pixel)-1;\
102 : srcStride >>= sizeof(pixel)-1;\
103 : for(i=0; i<w; i++)\
104 : {\
105 : const int srcB= src[-2*srcStride];\
106 : const int srcA= src[-1*srcStride];\
107 : const int src0= src[0 *srcStride];\
108 : const int src1= src[1 *srcStride];\
109 : const int src2= src[2 *srcStride];\
110 : const int src3= src[3 *srcStride];\
111 : const int src4= src[4 *srcStride];\
112 : OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
113 : OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
114 : dst++;\
115 : src++;\
116 : }\
117 : }\
118 : \
119 : static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
120 : const int h=2;\
121 : const int w=2;\
122 : const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
123 : INIT_CLIP\
124 : int i;\
125 : pixel *dst = (pixel*)_dst;\
126 : const pixel *src = (const pixel*)_src;\
127 : dstStride >>= sizeof(pixel)-1;\
128 : srcStride >>= sizeof(pixel)-1;\
129 : src -= 2*srcStride;\
130 : for(i=0; i<h+5; i++)\
131 : {\
132 : tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
133 : tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
134 : tmp+=tmpStride;\
135 : src+=srcStride;\
136 : }\
137 : tmp -= tmpStride*(h+5-2);\
138 : for(i=0; i<w; i++)\
139 : {\
140 : const int tmpB= tmp[-2*tmpStride] - pad;\
141 : const int tmpA= tmp[-1*tmpStride] - pad;\
142 : const int tmp0= tmp[0 *tmpStride] - pad;\
143 : const int tmp1= tmp[1 *tmpStride] - pad;\
144 : const int tmp2= tmp[2 *tmpStride] - pad;\
145 : const int tmp3= tmp[3 *tmpStride] - pad;\
146 : const int tmp4= tmp[4 *tmpStride] - pad;\
147 : OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
148 : OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
149 : dst++;\
150 : tmp++;\
151 : }\
152 : }\
153 : static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
154 : const int h=4;\
155 : INIT_CLIP\
156 : int i;\
157 : pixel *dst = (pixel*)_dst;\
158 : const pixel *src = (const pixel*)_src;\
159 : dstStride >>= sizeof(pixel)-1;\
160 : srcStride >>= sizeof(pixel)-1;\
161 : for(i=0; i<h; i++)\
162 : {\
163 : OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
164 : OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
165 : OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
166 : OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
167 : dst+=dstStride;\
168 : src+=srcStride;\
169 : }\
170 : }\
171 : \
172 : static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
173 : const int w=4;\
174 : INIT_CLIP\
175 : int i;\
176 : pixel *dst = (pixel*)_dst;\
177 : const pixel *src = (const pixel*)_src;\
178 : dstStride >>= sizeof(pixel)-1;\
179 : srcStride >>= sizeof(pixel)-1;\
180 : for(i=0; i<w; i++)\
181 : {\
182 : const int srcB= src[-2*srcStride];\
183 : const int srcA= src[-1*srcStride];\
184 : const int src0= src[0 *srcStride];\
185 : const int src1= src[1 *srcStride];\
186 : const int src2= src[2 *srcStride];\
187 : const int src3= src[3 *srcStride];\
188 : const int src4= src[4 *srcStride];\
189 : const int src5= src[5 *srcStride];\
190 : const int src6= src[6 *srcStride];\
191 : OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
192 : OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
193 : OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
194 : OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
195 : dst++;\
196 : src++;\
197 : }\
198 : }\
199 : \
200 : static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
201 : const int h=4;\
202 : const int w=4;\
203 : const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
204 : INIT_CLIP\
205 : int i;\
206 : pixel *dst = (pixel*)_dst;\
207 : const pixel *src = (const pixel*)_src;\
208 : dstStride >>= sizeof(pixel)-1;\
209 : srcStride >>= sizeof(pixel)-1;\
210 : src -= 2*srcStride;\
211 : for(i=0; i<h+5; i++)\
212 : {\
213 : tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\
214 : tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\
215 : tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\
216 : tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\
217 : tmp+=tmpStride;\
218 : src+=srcStride;\
219 : }\
220 : tmp -= tmpStride*(h+5-2);\
221 : for(i=0; i<w; i++)\
222 : {\
223 : const int tmpB= tmp[-2*tmpStride] - pad;\
224 : const int tmpA= tmp[-1*tmpStride] - pad;\
225 : const int tmp0= tmp[0 *tmpStride] - pad;\
226 : const int tmp1= tmp[1 *tmpStride] - pad;\
227 : const int tmp2= tmp[2 *tmpStride] - pad;\
228 : const int tmp3= tmp[3 *tmpStride] - pad;\
229 : const int tmp4= tmp[4 *tmpStride] - pad;\
230 : const int tmp5= tmp[5 *tmpStride] - pad;\
231 : const int tmp6= tmp[6 *tmpStride] - pad;\
232 : OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
233 : OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
234 : OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
235 : OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
236 : dst++;\
237 : tmp++;\
238 : }\
239 : }\
240 : \
241 : static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
242 : const int h=8;\
243 : INIT_CLIP\
244 : int i;\
245 : pixel *dst = (pixel*)_dst;\
246 : const pixel *src = (const pixel*)_src;\
247 : dstStride >>= sizeof(pixel)-1;\
248 : srcStride >>= sizeof(pixel)-1;\
249 : for(i=0; i<h; i++)\
250 : {\
251 : OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
252 : OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
253 : OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
254 : OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
255 : OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
256 : OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
257 : OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
258 : OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
259 : dst+=dstStride;\
260 : src+=srcStride;\
261 : }\
262 : }\
263 : \
264 : static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
265 : const int w=8;\
266 : INIT_CLIP\
267 : int i;\
268 : pixel *dst = (pixel*)_dst;\
269 : const pixel *src = (const pixel*)_src;\
270 : dstStride >>= sizeof(pixel)-1;\
271 : srcStride >>= sizeof(pixel)-1;\
272 : for(i=0; i<w; i++)\
273 : {\
274 : const int srcB= src[-2*srcStride];\
275 : const int srcA= src[-1*srcStride];\
276 : const int src0= src[0 *srcStride];\
277 : const int src1= src[1 *srcStride];\
278 : const int src2= src[2 *srcStride];\
279 : const int src3= src[3 *srcStride];\
280 : const int src4= src[4 *srcStride];\
281 : const int src5= src[5 *srcStride];\
282 : const int src6= src[6 *srcStride];\
283 : const int src7= src[7 *srcStride];\
284 : const int src8= src[8 *srcStride];\
285 : const int src9= src[9 *srcStride];\
286 : const int src10=src[10*srcStride];\
287 : OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
288 : OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
289 : OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
290 : OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
291 : OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
292 : OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
293 : OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
294 : OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
295 : dst++;\
296 : src++;\
297 : }\
298 : }\
299 : \
300 : static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
301 : const int h=8;\
302 : const int w=8;\
303 : const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
304 : INIT_CLIP\
305 : int i;\
306 : pixel *dst = (pixel*)_dst;\
307 : const pixel *src = (const pixel*)_src;\
308 : dstStride >>= sizeof(pixel)-1;\
309 : srcStride >>= sizeof(pixel)-1;\
310 : src -= 2*srcStride;\
311 : for(i=0; i<h+5; i++)\
312 : {\
313 : tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\
314 : tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\
315 : tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\
316 : tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\
317 : tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\
318 : tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\
319 : tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\
320 : tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\
321 : tmp+=tmpStride;\
322 : src+=srcStride;\
323 : }\
324 : tmp -= tmpStride*(h+5-2);\
325 : for(i=0; i<w; i++)\
326 : {\
327 : const int tmpB= tmp[-2*tmpStride] - pad;\
328 : const int tmpA= tmp[-1*tmpStride] - pad;\
329 : const int tmp0= tmp[0 *tmpStride] - pad;\
330 : const int tmp1= tmp[1 *tmpStride] - pad;\
331 : const int tmp2= tmp[2 *tmpStride] - pad;\
332 : const int tmp3= tmp[3 *tmpStride] - pad;\
333 : const int tmp4= tmp[4 *tmpStride] - pad;\
334 : const int tmp5= tmp[5 *tmpStride] - pad;\
335 : const int tmp6= tmp[6 *tmpStride] - pad;\
336 : const int tmp7= tmp[7 *tmpStride] - pad;\
337 : const int tmp8= tmp[8 *tmpStride] - pad;\
338 : const int tmp9= tmp[9 *tmpStride] - pad;\
339 : const int tmp10=tmp[10*tmpStride] - pad;\
340 : OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
341 : OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
342 : OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
343 : OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
344 : OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
345 : OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
346 : OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
347 : OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
348 : dst++;\
349 : tmp++;\
350 : }\
351 : }\
352 : \
353 : static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
354 : FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
355 : FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
356 : src += 8*srcStride;\
357 : dst += 8*dstStride;\
358 : FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
359 : FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
360 : }\
361 : \
362 : static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
363 : FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
364 : FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
365 : src += 8*srcStride;\
366 : dst += 8*dstStride;\
367 : FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
368 : FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
369 : }\
370 : \
371 : static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, pixeltmp *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
372 : FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
373 : FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
374 : src += 8*srcStride;\
375 : dst += 8*dstStride;\
376 : FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
377 : FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
378 : }\
379 :
380 : #define H264_MC(OPNAME, SIZE) \
381 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
382 : {\
383 : FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
384 : }\
385 : \
386 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
387 : {\
388 : uint8_t half[SIZE*SIZE*sizeof(pixel)];\
389 : FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
390 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
391 : }\
392 : \
393 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
394 : {\
395 : FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
396 : }\
397 : \
398 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
399 : {\
400 : uint8_t half[SIZE*SIZE*sizeof(pixel)];\
401 : FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
402 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
403 : }\
404 : \
405 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
406 : {\
407 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
408 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
409 : uint8_t half[SIZE*SIZE*sizeof(pixel)];\
410 : FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
411 : FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
412 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
413 : }\
414 : \
415 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
416 : {\
417 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
418 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
419 : FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
420 : FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
421 : }\
422 : \
423 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
424 : {\
425 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
426 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
427 : uint8_t half[SIZE*SIZE*sizeof(pixel)];\
428 : FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
429 : FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
430 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
431 : }\
432 : \
433 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
434 : {\
435 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
436 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
437 : uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
438 : uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
439 : FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
440 : FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
441 : FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
442 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
443 : }\
444 : \
445 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
446 : {\
447 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
448 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
449 : uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
450 : uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
451 : FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
452 : FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
453 : FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
454 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
455 : }\
456 : \
457 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
458 : {\
459 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
460 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
461 : uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
462 : uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
463 : FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
464 : FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
465 : FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
466 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
467 : }\
468 : \
469 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
470 : {\
471 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
472 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
473 : uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
474 : uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
475 : FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
476 : FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
477 : FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
478 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
479 : }\
480 : \
481 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
482 : {\
483 : pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
484 : FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
485 : }\
486 : \
487 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
488 : {\
489 : pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
490 : uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
491 : uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
492 : FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
493 : FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
494 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
495 : }\
496 : \
497 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
498 : {\
499 : pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
500 : uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
501 : uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
502 : FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
503 : FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
504 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
505 : }\
506 : \
507 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
508 : {\
509 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
510 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
511 : pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
512 : uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
513 : uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
514 : FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\
515 : FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
516 : FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
517 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
518 : }\
519 : \
520 : static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
521 : {\
522 : uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
523 : uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
524 : pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
525 : uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
526 : uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
527 : FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\
528 : FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\
529 : FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\
530 : FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
531 : }\
532 :
533 : #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1)
534 : //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
535 : #define op_put(a, b) a = CLIP(((b) + 16)>>5)
536 : #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
537 : #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
538 :
539 91679060 : H264_LOWPASS(put_ , op_put, op2_put)
540 1249857 : H264_LOWPASS(avg_ , op_avg, op2_avg)
541 194704 : H264_MC(put_, 2)
542 14595824 : H264_MC(put_, 4)
543 13040851 : H264_MC(put_, 8)
544 9718314 : H264_MC(put_, 16)
545 3944032 : H264_MC(avg_, 4)
546 1185443 : H264_MC(avg_, 8)
547 1792561 : H264_MC(avg_, 16)
548 :
549 : #undef op_avg
550 : #undef op_put
551 : #undef op2_avg
552 : #undef op2_put
|