Line data Source code
1 : /*
2 : * HEVC video decoder
3 : *
4 : * Copyright (C) 2012 - 2013 Guillaume Martres
5 : *
6 : * This file is part of FFmpeg.
7 : *
8 : * FFmpeg is free software; you can redistribute it and/or
9 : * modify it under the terms of the GNU Lesser General Public
10 : * License as published by the Free Software Foundation; either
11 : * version 2.1 of the License, or (at your option) any later version.
12 : *
13 : * FFmpeg is distributed in the hope that it will be useful,
14 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : * Lesser General Public License for more details.
17 : *
18 : * You should have received a copy of the GNU Lesser General Public
19 : * License along with FFmpeg; if not, write to the Free Software
20 : * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 : */
22 :
23 : #include "libavutil/pixdesc.h"
24 :
25 : #include "bit_depth_template.c"
26 : #include "hevcpred.h"
27 :
28 : #define POS(x, y) src[(x) + stride * (y)]
29 :
30 19926145 : static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
31 : int log2_size, int c_idx)
32 : {
33 : #define PU(x) \
34 : ((x) >> s->ps.sps->log2_min_pu_size)
35 : #define MVF(x, y) \
36 : (s->ref->tab_mvf[(x) + (y) * min_pu_width])
37 : #define MVF_PU(x, y) \
38 : MVF(PU(x0 + ((x) * (1 << hshift))), PU(y0 + ((y) * (1 << vshift))))
39 : #define IS_INTRA(x, y) \
40 : (MVF_PU(x, y).pred_flag == PF_INTRA)
41 : #define MIN_TB_ADDR_ZS(x, y) \
42 : s->ps.pps->min_tb_addr_zs[(y) * (s->ps.sps->tb_mask+2) + (x)]
43 : #define EXTEND(ptr, val, len) \
44 : do { \
45 : pixel4 pix = PIXEL_SPLAT_X4(val); \
46 : for (i = 0; i < (len); i += 4) \
47 : AV_WN4P(ptr + i, pix); \
48 : } while (0)
49 :
50 : #define EXTEND_RIGHT_CIP(ptr, start, length) \
51 : for (i = start; i < (start) + (length); i += 4) \
52 : if (!IS_INTRA(i, -1)) \
53 : AV_WN4P(&ptr[i], a); \
54 : else \
55 : a = PIXEL_SPLAT_X4(ptr[i+3])
56 : #define EXTEND_LEFT_CIP(ptr, start, length) \
57 : for (i = start; i > (start) - (length); i--) \
58 : if (!IS_INTRA(i - 1, -1)) \
59 : ptr[i - 1] = ptr[i]
60 : #define EXTEND_UP_CIP(ptr, start, length) \
61 : for (i = (start); i > (start) - (length); i -= 4) \
62 : if (!IS_INTRA(-1, i - 3)) \
63 : AV_WN4P(&ptr[i - 3], a); \
64 : else \
65 : a = PIXEL_SPLAT_X4(ptr[i - 3])
66 : #define EXTEND_DOWN_CIP(ptr, start, length) \
67 : for (i = start; i < (start) + (length); i += 4) \
68 : if (!IS_INTRA(-1, i)) \
69 : AV_WN4P(&ptr[i], a); \
70 : else \
71 : a = PIXEL_SPLAT_X4(ptr[i + 3])
72 :
73 19926145 : HEVCLocalContext *lc = s->HEVClc;
74 : int i;
75 19926145 : int hshift = s->ps.sps->hshift[c_idx];
76 19926145 : int vshift = s->ps.sps->vshift[c_idx];
77 19926145 : int size = (1 << log2_size);
78 19926145 : int size_in_luma_h = size << hshift;
79 19926145 : int size_in_tbs_h = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
80 19926145 : int size_in_luma_v = size << vshift;
81 19926145 : int size_in_tbs_v = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
82 19926145 : int x = x0 >> hshift;
83 19926145 : int y = y0 >> vshift;
84 19926145 : int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
85 19926145 : int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
86 :
87 19926145 : int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);
88 :
89 19926145 : ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
90 19926145 : pixel *src = (pixel*)s->frame->data[c_idx] + x + y * stride;
91 :
92 19926145 : int min_pu_width = s->ps.sps->min_pu_width;
93 :
94 29288054 : enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
95 9361909 : lc->tu.intra_pred_mode;
96 : pixel4 a;
97 : pixel left_array[2 * MAX_TB_SIZE + 1];
98 : pixel filtered_left_array[2 * MAX_TB_SIZE + 1];
99 : pixel top_array[2 * MAX_TB_SIZE + 1];
100 : pixel filtered_top_array[2 * MAX_TB_SIZE + 1];
101 :
102 19926145 : pixel *left = left_array + 1;
103 19926145 : pixel *top = top_array + 1;
104 19926145 : pixel *filtered_left = filtered_left_array + 1;
105 19926145 : pixel *filtered_top = filtered_top_array + 1;
106 19926145 : int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->ps.sps->tb_mask);
107 19926145 : int cand_left = lc->na.cand_left;
108 19926145 : int cand_up_left = lc->na.cand_up_left;
109 19926145 : int cand_up = lc->na.cand_up;
110 19926145 : int cand_up_right = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask, y_tb - 1);
111 :
112 39852290 : int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->ps.sps->height) -
113 19926145 : (y0 + size_in_luma_v)) >> vshift;
114 39852290 : int top_right_size = (FFMIN(x0 + 2 * size_in_luma_h, s->ps.sps->width) -
115 19926145 : (x0 + size_in_luma_h)) >> hshift;
116 :
117 19926145 : if (s->ps.pps->constrained_intra_pred_flag == 1) {
118 135525 : int size_in_luma_pu_v = PU(size_in_luma_v);
119 135525 : int size_in_luma_pu_h = PU(size_in_luma_h);
120 135525 : int on_pu_edge_x = !av_mod_uintp2(x0, s->ps.sps->log2_min_pu_size);
121 135525 : int on_pu_edge_y = !av_mod_uintp2(y0, s->ps.sps->log2_min_pu_size);
122 135525 : if (!size_in_luma_pu_h)
123 0 : size_in_luma_pu_h++;
124 135525 : if (cand_bottom_left == 1 && on_pu_edge_x) {
125 42080 : int x_left_pu = PU(x0 - 1);
126 42080 : int y_bottom_pu = PU(y0 + size_in_luma_v);
127 42080 : int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_bottom_pu);
128 42080 : cand_bottom_left = 0;
129 89902 : for (i = 0; i < max; i += 2)
130 47822 : cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA);
131 : }
132 135525 : if (cand_left == 1 && on_pu_edge_x) {
133 133107 : int x_left_pu = PU(x0 - 1);
134 133107 : int y_left_pu = PU(y0);
135 133107 : int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_left_pu);
136 133107 : cand_left = 0;
137 287436 : for (i = 0; i < max; i += 2)
138 154329 : cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA);
139 : }
140 135525 : if (cand_up_left == 1) {
141 129084 : int x_left_pu = PU(x0 - 1);
142 129084 : int y_top_pu = PU(y0 - 1);
143 129084 : cand_up_left = MVF(x_left_pu, y_top_pu).pred_flag == PF_INTRA;
144 : }
145 135525 : if (cand_up == 1 && on_pu_edge_y) {
146 131403 : int x_top_pu = PU(x0);
147 131403 : int y_top_pu = PU(y0 - 1);
148 131403 : int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_top_pu);
149 131403 : cand_up = 0;
150 282852 : for (i = 0; i < max; i += 2)
151 151449 : cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA);
152 : }
153 135525 : if (cand_up_right == 1 && on_pu_edge_y) {
154 87193 : int y_top_pu = PU(y0 - 1);
155 87193 : int x_right_pu = PU(x0 + size_in_luma_h);
156 87193 : int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_right_pu);
157 87193 : cand_up_right = 0;
158 187988 : for (i = 0; i < max; i += 2)
159 100795 : cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA);
160 : }
161 135525 : memset(left, 128, 2 * MAX_TB_SIZE*sizeof(pixel));
162 135525 : memset(top , 128, 2 * MAX_TB_SIZE*sizeof(pixel));
163 135525 : top[-1] = 128;
164 : }
165 19926145 : if (cand_up_left) {
166 19333070 : left[-1] = POS(-1, -1);
167 19333070 : top[-1] = left[-1];
168 : }
169 19926145 : if (cand_up)
170 19558552 : memcpy(top, src - stride, size * sizeof(pixel));
171 19926145 : if (cand_up_right) {
172 12764383 : memcpy(top + size, src - stride + size, size * sizeof(pixel));
173 12764383 : EXTEND(top + size + top_right_size, POS(size + top_right_size - 1, -1),
174 : size - top_right_size);
175 : }
176 19926145 : if (cand_left)
177 137585860 : for (i = 0; i < size; i++)
178 117897080 : left[i] = POS(-1, i);
179 19926145 : if (cand_bottom_left) {
180 44100208 : for (i = size; i < size + bottom_left_size; i++)
181 37556532 : left[i] = POS(-1, i);
182 6543676 : EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1),
183 : size - bottom_left_size);
184 : }
185 :
186 19926145 : if (s->ps.pps->constrained_intra_pred_flag == 1) {
187 135525 : if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) {
188 267702 : int size_max_x = x0 + ((2 * size) << hshift) < s->ps.sps->width ?
189 133851 : 2 * size : (s->ps.sps->width - x0) >> hshift;
190 267702 : int size_max_y = y0 + ((2 * size) << vshift) < s->ps.sps->height ?
191 133851 : 2 * size : (s->ps.sps->height - y0) >> vshift;
192 133851 : int j = size + (cand_bottom_left? bottom_left_size: 0) -1;
193 133851 : if (!cand_up_right) {
194 104394 : size_max_x = x0 + ((size) << hshift) < s->ps.sps->width ?
195 52197 : size : (s->ps.sps->width - x0) >> hshift;
196 : }
197 133851 : if (!cand_bottom_left) {
198 189324 : size_max_y = y0 + (( size) << vshift) < s->ps.sps->height ?
199 94662 : size : (s->ps.sps->height - y0) >> vshift;
200 : }
201 133851 : if (cand_bottom_left || cand_left || cand_up_left) {
202 264764 : while (j > -1 && !IS_INTRA(-1, j))
203 8640 : j--;
204 256124 : if (!IS_INTRA(-1, j)) {
205 0 : j = 0;
206 0 : while (j < size_max_x && !IS_INTRA(j, -1))
207 0 : j++;
208 0 : EXTEND_LEFT_CIP(top, j, j + 1);
209 0 : left[-1] = top[-1];
210 : }
211 : } else {
212 5789 : j = 0;
213 15218 : while (j < size_max_x && !IS_INTRA(j, -1))
214 3640 : j++;
215 5789 : if (j > 0)
216 409 : if (x0 > 0) {
217 380 : EXTEND_LEFT_CIP(top, j, j + 1);
218 : } else {
219 29 : EXTEND_LEFT_CIP(top, j, j);
220 29 : top[-1] = top[0];
221 : }
222 5789 : left[-1] = top[-1];
223 : }
224 133851 : left[-1] = top[-1];
225 133851 : if (cand_bottom_left || cand_left) {
226 126898 : a = PIXEL_SPLAT_X4(left[-1]);
227 126898 : EXTEND_DOWN_CIP(left, 0, size_max_y);
228 : }
229 133851 : if (!cand_left)
230 7301 : EXTEND(left, left[-1], size);
231 133851 : if (!cand_bottom_left)
232 94662 : EXTEND(left + size, left[size - 1], size);
233 133851 : if (x0 != 0 && y0 != 0) {
234 130185 : a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
235 130185 : EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
236 260370 : if (!IS_INTRA(-1, - 1))
237 7426 : left[-1] = left[0];
238 3666 : } else if (x0 == 0) {
239 1552 : EXTEND(left, 0, size_max_y);
240 : } else {
241 2114 : a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
242 2114 : EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
243 : }
244 133851 : top[-1] = left[-1];
245 133851 : if (y0 != 0) {
246 131737 : a = PIXEL_SPLAT_X4(left[-1]);
247 131737 : EXTEND_RIGHT_CIP(top, 0, size_max_x);
248 : }
249 : }
250 : }
251 : // Infer the unavailable samples
252 19926145 : if (!cand_bottom_left) {
253 13382469 : if (cand_left) {
254 13145452 : EXTEND(left + size, left[size - 1], size);
255 237017 : } else if (cand_up_left) {
256 1164 : EXTEND(left, left[-1], 2 * size);
257 1164 : cand_left = 1;
258 235853 : } else if (cand_up) {
259 225484 : left[-1] = top[0];
260 225484 : EXTEND(left, left[-1], 2 * size);
261 225484 : cand_up_left = 1;
262 225484 : cand_left = 1;
263 10369 : } else if (cand_up_right) {
264 340 : EXTEND(top, top[size], size);
265 340 : left[-1] = top[size];
266 340 : EXTEND(left, left[-1], 2 * size);
267 340 : cand_up = 1;
268 340 : cand_up_left = 1;
269 340 : cand_left = 1;
270 : } else { // No samples available
271 10029 : left[-1] = (1 << (BIT_DEPTH - 1));
272 10029 : EXTEND(top, left[-1], 2 * size);
273 10029 : EXTEND(left, left[-1], 2 * size);
274 : }
275 : }
276 :
277 19926145 : if (!cand_left)
278 10377 : EXTEND(left, left[size], size);
279 19926145 : if (!cand_up_left) {
280 367251 : left[-1] = left[0];
281 : }
282 19926145 : if (!cand_up)
283 367253 : EXTEND(top, left[-1], size);
284 19926145 : if (!cand_up_right)
285 7161762 : EXTEND(top + size, top[size - 1], size);
286 :
287 19926145 : top[-1] = left[-1];
288 :
289 : // Filtering process
290 19926145 : if (!s->ps.sps->intra_smoothing_disabled_flag && (c_idx == 0 || s->ps.sps->chroma_format_idc == 3)) {
291 9586830 : if (mode != INTRA_DC && size != 4){
292 3060686 : int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
293 3060686 : int min_dist_vert_hor = FFMIN(FFABS((int)(mode - 26U)),
294 : FFABS((int)(mode - 10U)));
295 3060686 : if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) {
296 1232216 : int threshold = 1 << (BIT_DEPTH - 5);
297 1232216 : if (s->ps.sps->sps_strong_intra_smoothing_enable_flag && c_idx == 0 &&
298 140224 : log2_size == 5 &&
299 194030 : FFABS(top[-1] + top[63] - 2 * top[31]) < threshold &&
300 53806 : FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) {
301 : // We can't just overwrite values in top because it could be
302 : // a pointer into src
303 29137 : filtered_top[-1] = top[-1];
304 29137 : filtered_top[63] = top[63];
305 1864768 : for (i = 0; i < 63; i++)
306 5506893 : filtered_top[i] = ((64 - (i + 1)) * top[-1] +
307 3671262 : (i + 1) * top[63] + 32) >> 6;
308 1864768 : for (i = 0; i < 63; i++)
309 5506893 : left[i] = ((64 - (i + 1)) * left[-1] +
310 3671262 : (i + 1) * left[63] + 32) >> 6;
311 29137 : top = filtered_top;
312 : } else {
313 1203079 : filtered_left[2 * size - 1] = left[2 * size - 1];
314 1203079 : filtered_top[2 * size - 1] = top[2 * size - 1];
315 33914256 : for (i = 2 * size - 2; i >= 0; i--)
316 98133531 : filtered_left[i] = (left[i + 1] + 2 * left[i] +
317 65422354 : left[i - 1] + 2) >> 2;
318 2406158 : filtered_top[-1] =
319 2406158 : filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
320 33914256 : for (i = 2 * size - 2; i >= 0; i--)
321 98133531 : filtered_top[i] = (top[i + 1] + 2 * top[i] +
322 65422354 : top[i - 1] + 2) >> 2;
323 1203079 : left = filtered_left;
324 1203079 : top = filtered_top;
325 : }
326 : }
327 : }
328 : }
329 :
330 19926145 : switch (mode) {
331 4119026 : case INTRA_PLANAR:
332 4119026 : s->hpc.pred_planar[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
333 : (uint8_t *)left, stride);
334 4119026 : break;
335 2435016 : case INTRA_DC:
336 2435016 : s->hpc.pred_dc((uint8_t *)src, (uint8_t *)top,
337 : (uint8_t *)left, stride, log2_size, c_idx);
338 2435016 : break;
339 13372103 : default:
340 13372103 : s->hpc.pred_angular[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
341 : (uint8_t *)left, stride, c_idx,
342 : mode);
343 13372103 : break;
344 : }
345 19926145 : }
346 :
347 : #define INTRA_PRED(size) \
348 : static void FUNC(intra_pred_ ## size)(HEVCContext *s, int x0, int y0, int c_idx) \
349 : { \
350 : FUNC(intra_pred)(s, x0, y0, size, c_idx); \
351 : }
352 :
353 14171370 : INTRA_PRED(2)
354 4122548 : INTRA_PRED(3)
355 1362700 : INTRA_PRED(4)
356 269527 : INTRA_PRED(5)
357 :
358 : #undef INTRA_PRED
359 :
360 4119026 : static av_always_inline void FUNC(pred_planar)(uint8_t *_src, const uint8_t *_top,
361 : const uint8_t *_left, ptrdiff_t stride,
362 : int trafo_size)
363 : {
364 : int x, y;
365 4119026 : pixel *src = (pixel *)_src;
366 4119026 : const pixel *top = (const pixel *)_top;
367 4119026 : const pixel *left = (const pixel *)_left;
368 4119026 : int size = 1 << trafo_size;
369 30401626 : for (y = 0; y < size; y++)
370 293892232 : for (x = 0; x < size; x++)
371 802828896 : POS(x, y) = ((size - 1 - x) * left[y] + (x + 1) * top[size] +
372 535219264 : (size - 1 - y) * top[x] + (y + 1) * left[size] + size) >> (trafo_size + 1);
373 4119026 : }
374 :
375 : #define PRED_PLANAR(size)\
376 : static void FUNC(pred_planar_ ## size)(uint8_t *src, const uint8_t *top, \
377 : const uint8_t *left, ptrdiff_t stride) \
378 : { \
379 : FUNC(pred_planar)(src, top, left, stride, size + 2); \
380 : }
381 :
382 2811018 : PRED_PLANAR(0)
383 887938 : PRED_PLANAR(1)
384 344201 : PRED_PLANAR(2)
385 75869 : PRED_PLANAR(3)
386 :
387 : #undef PRED_PLANAR
388 :
389 2435016 : static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top,
390 : const uint8_t *_left,
391 : ptrdiff_t stride, int log2_size, int c_idx)
392 : {
393 : int i, j, x, y;
394 2435016 : int size = (1 << log2_size);
395 2435016 : pixel *src = (pixel *)_src;
396 2435016 : const pixel *top = (const pixel *)_top;
397 2435016 : const pixel *left = (const pixel *)_left;
398 2435016 : int dc = size;
399 : pixel4 a;
400 19255928 : for (i = 0; i < size; i++)
401 16820912 : dc += left[i] + top[i];
402 :
403 2435016 : dc >>= log2_size + 1;
404 :
405 2435016 : a = PIXEL_SPLAT_X4(dc);
406 :
407 19255928 : for (i = 0; i < size; i++)
408 64194872 : for (j = 0; j < size; j+=4)
409 47373960 : AV_WN4P(&POS(j, i), a);
410 :
411 2435016 : if (c_idx == 0 && size < 32) {
412 872104 : POS(0, 0) = (left[0] + 2 * dc + top[0] + 2) >> 2;
413 5759184 : for (x = 1; x < size; x++)
414 4887080 : POS(x, 0) = (top[x] + 3 * dc + 2) >> 2;
415 5759184 : for (y = 1; y < size; y++)
416 4887080 : POS(0, y) = (left[y] + 3 * dc + 2) >> 2;
417 : }
418 2435016 : }
419 :
420 13372103 : static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
421 : const uint8_t *_top,
422 : const uint8_t *_left,
423 : ptrdiff_t stride, int c_idx,
424 : int mode, int size)
425 : {
426 : int x, y;
427 13372103 : pixel *src = (pixel *)_src;
428 13372103 : const pixel *top = (const pixel *)_top;
429 13372103 : const pixel *left = (const pixel *)_left;
430 :
431 : static const int intra_pred_angle[] = {
432 : 32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5, -9, -13, -17, -21, -26, -32,
433 : -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32
434 : };
435 : static const int inv_angle[] = {
436 : -4096, -1638, -910, -630, -482, -390, -315, -256, -315, -390, -482,
437 : -630, -910, -1638, -4096
438 : };
439 :
440 13372103 : int angle = intra_pred_angle[mode - 2];
441 : pixel ref_array[3 * MAX_TB_SIZE + 4];
442 13372103 : pixel *ref_tmp = ref_array + size;
443 : const pixel *ref;
444 13372103 : int last = (size * angle) >> 5;
445 :
446 13372103 : if (mode >= 18) {
447 7275426 : ref = top - 1;
448 7275426 : if (angle < 0 && last < -1) {
449 6206936 : for (x = 0; x <= size; x += 4)
450 4419809 : AV_WN4P(&ref_tmp[x], AV_RN4P(&top[x - 1]));
451 8317294 : for (x = last; x <= -1; x++)
452 6530167 : ref_tmp[x] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
453 1787127 : ref = ref_tmp;
454 : }
455 :
456 48772122 : for (y = 0; y < size; y++) {
457 41496696 : int idx = ((y + 1) * angle) >> 5;
458 41496696 : int fact = ((y + 1) * angle) & 31;
459 41496696 : if (fact) {
460 75805383 : for (x = 0; x < size; x += 4) {
461 152702904 : POS(x , y) = ((32 - fact) * ref[x + idx + 1] +
462 101801936 : fact * ref[x + idx + 2] + 16) >> 5;
463 152702904 : POS(x + 1, y) = ((32 - fact) * ref[x + 1 + idx + 1] +
464 101801936 : fact * ref[x + 1 + idx + 2] + 16) >> 5;
465 152702904 : POS(x + 2, y) = ((32 - fact) * ref[x + 2 + idx + 1] +
466 101801936 : fact * ref[x + 2 + idx + 2] + 16) >> 5;
467 152702904 : POS(x + 3, y) = ((32 - fact) * ref[x + 3 + idx + 1] +
468 101801936 : fact * ref[x + 3 + idx + 2] + 16) >> 5;
469 : }
470 : } else {
471 52667073 : for (x = 0; x < size; x += 4)
472 36074792 : AV_WN4P(&POS(x, y), AV_RN4P(&ref[x + idx + 1]));
473 : }
474 : }
475 7275426 : if (mode == 26 && c_idx == 0 && size < 32) {
476 5707713 : for (y = 0; y < size; y++)
477 4889028 : POS(0, y) = av_clip_pixel(top[0] + ((left[y] - left[-1]) >> 1));
478 : }
479 : } else {
480 6096677 : ref = left - 1;
481 6096677 : if (angle < 0 && last < -1) {
482 4150898 : for (x = 0; x <= size; x += 4)
483 2968790 : AV_WN4P(&ref_tmp[x], AV_RN4P(&left[x - 1]));
484 5198156 : for (x = last; x <= -1; x++)
485 4016048 : ref_tmp[x] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
486 1182108 : ref = ref_tmp;
487 : }
488 :
489 41590397 : for (x = 0; x < size; x++) {
490 35493720 : int idx = ((x + 1) * angle) >> 5;
491 35493720 : int fact = ((x + 1) * angle) & 31;
492 35493720 : if (fact) {
493 246216444 : for (y = 0; y < size; y++) {
494 662203632 : POS(x, y) = ((32 - fact) * ref[y + idx + 1] +
495 441469088 : fact * ref[y + idx + 2] + 16) >> 5;
496 : }
497 : } else {
498 99700604 : for (y = 0; y < size; y++)
499 89688784 : POS(x, y) = ref[y + idx + 1];
500 : }
501 : }
502 6096677 : if (mode == 10 && c_idx == 0 && size < 32) {
503 1045480 : for (x = 0; x < size; x += 4) {
504 622514 : POS(x, 0) = av_clip_pixel(left[0] + ((top[x ] - top[-1]) >> 1));
505 622514 : POS(x + 1, 0) = av_clip_pixel(left[0] + ((top[x + 1] - top[-1]) >> 1));
506 622514 : POS(x + 2, 0) = av_clip_pixel(left[0] + ((top[x + 2] - top[-1]) >> 1));
507 622514 : POS(x + 3, 0) = av_clip_pixel(left[0] + ((top[x + 3] - top[-1]) >> 1));
508 : }
509 : }
510 : }
511 13372103 : }
512 :
513 9811730 : static void FUNC(pred_angular_0)(uint8_t *src, const uint8_t *top,
514 : const uint8_t *left,
515 : ptrdiff_t stride, int c_idx, int mode)
516 : {
517 9811730 : FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 2);
518 9811730 : }
519 :
520 2669593 : static void FUNC(pred_angular_1)(uint8_t *src, const uint8_t *top,
521 : const uint8_t *left,
522 : ptrdiff_t stride, int c_idx, int mode)
523 : {
524 2669593 : FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 3);
525 2669593 : }
526 :
527 757388 : static void FUNC(pred_angular_2)(uint8_t *src, const uint8_t *top,
528 : const uint8_t *left,
529 : ptrdiff_t stride, int c_idx, int mode)
530 : {
531 757388 : FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 4);
532 757388 : }
533 :
534 133392 : static void FUNC(pred_angular_3)(uint8_t *src, const uint8_t *top,
535 : const uint8_t *left,
536 : ptrdiff_t stride, int c_idx, int mode)
537 : {
538 133392 : FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 5);
539 133392 : }
540 :
541 : #undef EXTEND_LEFT_CIP
542 : #undef EXTEND_RIGHT_CIP
543 : #undef EXTEND_UP_CIP
544 : #undef EXTEND_DOWN_CIP
545 : #undef IS_INTRA
546 : #undef MVF_PU
547 : #undef MVF
548 : #undef PU
549 : #undef EXTEND
550 : #undef MIN_TB_ADDR_ZS
551 : #undef POS
|