FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vp9recon.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 276 290 95.2%
Functions: 11 11 100.0%
Branches: 297 322 92.2%

Line Branch Exec Source
1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "libavutil/avassert.h"
25 #include "libavutil/frame.h"
26 #include "libavutil/mem_internal.h"
27
28 #include "progressframe.h"
29 #include "videodsp.h"
30 #include "vp9data.h"
31 #include "vp9dec.h"
32
33 1545101 static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a,
34 uint8_t *dst_edge, ptrdiff_t stride_edge,
35 uint8_t *dst_inner, ptrdiff_t stride_inner,
36 uint8_t *l, int col, int x, int w,
37 int row, int y, enum TxfmMode tx,
38 int p, int ss_h, int ss_v, int bytesperpixel)
39 {
40 1545101 const VP9Context *s = td->s;
41
4/4
✓ Branch 0 taken 59104 times.
✓ Branch 1 taken 1485997 times.
✓ Branch 2 taken 17485 times.
✓ Branch 3 taken 41619 times.
1545101 int have_top = row > 0 || y > 0;
42
4/4
✓ Branch 0 taken 54127 times.
✓ Branch 1 taken 1490974 times.
✓ Branch 2 taken 17792 times.
✓ Branch 3 taken 36335 times.
1545101 int have_left = col > td->tile_col_start || x > 0;
43 1545101 int have_right = x < w - 1;
44 1545101 int bpp = s->s.h.bpp;
45 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
46 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
47 { DC_127_PRED, VERT_PRED } },
48 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
49 { HOR_PRED, HOR_PRED } },
50 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
51 { LEFT_DC_PRED, DC_PRED } },
52 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
53 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
54 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
55 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
56 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
57 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
58 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
59 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
60 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
61 { DC_127_PRED, VERT_LEFT_PRED } },
62 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
63 { HOR_UP_PRED, HOR_UP_PRED } },
64 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
65 { HOR_PRED, TM_VP8_PRED } },
66 };
67 static const struct {
68 uint8_t needs_left:1;
69 uint8_t needs_top:1;
70 uint8_t needs_topleft:1;
71 uint8_t needs_topright:1;
72 uint8_t invert_left:1;
73 } edges[N_INTRA_PRED_MODES] = {
74 [VERT_PRED] = { .needs_top = 1 },
75 [HOR_PRED] = { .needs_left = 1 },
76 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
77 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
78 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
79 .needs_topleft = 1 },
80 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
81 .needs_topleft = 1 },
82 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1,
83 .needs_topleft = 1 },
84 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
85 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
86 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1,
87 .needs_topleft = 1 },
88 [LEFT_DC_PRED] = { .needs_left = 1 },
89 [TOP_DC_PRED] = { .needs_top = 1 },
90 [DC_128_PRED] = { 0 },
91 [DC_127_PRED] = { 0 },
92 [DC_129_PRED] = { 0 }
93 };
94
95 av_assert2(mode >= 0 && mode < 10);
96 1545101 mode = mode_conv[mode][have_left][have_top];
97
2/2
✓ Branch 0 taken 1087383 times.
✓ Branch 1 taken 457718 times.
1545101 if (edges[mode].needs_top) {
98 uint8_t *top, *topleft;
99 1087383 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
100 1087383 int n_px_need_tr = 0;
101
102
6/6
✓ Branch 0 taken 795515 times.
✓ Branch 1 taken 291868 times.
✓ Branch 2 taken 57929 times.
✓ Branch 3 taken 737586 times.
✓ Branch 4 taken 22995 times.
✓ Branch 5 taken 34934 times.
1087383 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
103 22995 n_px_need_tr = 4;
104
105 // if top of sb64-row, use s->intra_pred_data[] instead of
106 // dst[-stride] for intra prediction (it contains pre- instead of
107 // post-loopfilter data)
108
2/2
✓ Branch 0 taken 1083048 times.
✓ Branch 1 taken 4335 times.
1087383 if (have_top) {
109
2/2
✓ Branch 0 taken 120295 times.
✓ Branch 1 taken 104166 times.
224461 top = !(row & 7) && !y ?
110
2/2
✓ Branch 0 taken 224461 times.
✓ Branch 1 taken 858587 times.
2166096 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
111
2/2
✓ Branch 0 taken 648282 times.
✓ Branch 1 taken 314471 times.
962753 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
112
2/2
✓ Branch 0 taken 1050997 times.
✓ Branch 1 taken 32051 times.
1083048 if (have_left)
113
2/2
✓ Branch 0 taken 114624 times.
✓ Branch 1 taken 101678 times.
1267299 topleft = !(row & 7) && !y ?
114
2/2
✓ Branch 0 taken 216302 times.
✓ Branch 1 taken 834695 times.
2101994 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
115
4/4
✓ Branch 0 taken 307591 times.
✓ Branch 1 taken 628782 times.
✓ Branch 2 taken 137731 times.
✓ Branch 3 taken 169860 times.
936373 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
116 169860 &dst_inner[-stride_inner];
117 }
118
119
2/2
✓ Branch 0 taken 1083048 times.
✓ Branch 1 taken 4335 times.
1087383 if (have_top &&
120
8/8
✓ Branch 0 taken 269669 times.
✓ Branch 1 taken 813379 times.
✓ Branch 2 taken 266887 times.
✓ Branch 3 taken 2782 times.
✓ Branch 4 taken 266872 times.
✓ Branch 5 taken 15 times.
✓ Branch 6 taken 789266 times.
✓ Branch 7 taken 290985 times.
1083048 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
121
4/4
✓ Branch 0 taken 57929 times.
✓ Branch 1 taken 731337 times.
✓ Branch 2 taken 22995 times.
✓ Branch 3 taken 34934 times.
789266 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
122
2/2
✓ Branch 0 taken 1045163 times.
✓ Branch 1 taken 154 times.
1045317 n_px_need + n_px_need_tr <= n_px_have) {
123 1045163 *a = top;
124 } else {
125
2/2
✓ Branch 0 taken 37885 times.
✓ Branch 1 taken 4335 times.
42220 if (have_top) {
126
2/2
✓ Branch 0 taken 37747 times.
✓ Branch 1 taken 138 times.
37885 if (n_px_need <= n_px_have) {
127 37747 memcpy(*a, top, n_px_need * bytesperpixel);
128 } else {
129 #define memset_bpp(c, i1, v, i2, num) do { \
130 if (bytesperpixel == 1) { \
131 memset(&(c)[(i1)], (v)[(i2)], (num)); \
132 } else { \
133 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
134 for (n = 0; n < (num); n++) { \
135 AV_WN16A(&(c)[((i1) + n) * 2], val); \
136 } \
137 } \
138 } while (0)
139 138 memcpy(*a, top, n_px_have * bytesperpixel);
140
1/4
✓ Branch 0 taken 138 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
138 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
141 }
142 } else {
143 #define memset_val(c, val, num) do { \
144 if (bytesperpixel == 1) { \
145 memset((c), (val), (num)); \
146 } else { \
147 int n; \
148 for (n = 0; n < (num); n++) { \
149 AV_WN16A(&(c)[n * 2], (val)); \
150 } \
151 } \
152 } while (0)
153
4/4
✓ Branch 0 taken 4127 times.
✓ Branch 1 taken 208 times.
✓ Branch 2 taken 848 times.
✓ Branch 3 taken 208 times.
5183 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
154 }
155
2/2
✓ Branch 0 taken 7168 times.
✓ Branch 1 taken 35052 times.
42220 if (edges[mode].needs_topleft) {
156
4/4
✓ Branch 0 taken 4137 times.
✓ Branch 1 taken 3031 times.
✓ Branch 2 taken 51 times.
✓ Branch 3 taken 4086 times.
7168 if (have_left && have_top) {
157 #define assign_bpp(c, i1, v, i2) do { \
158 if (bytesperpixel == 1) { \
159 (c)[(i1)] = (v)[(i2)]; \
160 } else { \
161 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
162 } \
163 } while (0)
164
1/2
✓ Branch 0 taken 51 times.
✗ Branch 1 not taken.
51 assign_bpp(*a, -1, topleft, -1);
165 } else {
166 #define assign_val(c, i, v) do { \
167 if (bytesperpixel == 1) { \
168 (c)[(i)] = (v); \
169 } else { \
170 AV_WN16A(&(c)[(i) * 2], (v)); \
171 } \
172 } while (0)
173
6/6
✓ Branch 0 taken 6818 times.
✓ Branch 1 taken 299 times.
✓ Branch 2 taken 2691 times.
✓ Branch 3 taken 4127 times.
✓ Branch 4 taken 91 times.
✓ Branch 5 taken 208 times.
7117 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
174 }
175 }
176
4/4
✓ Branch 0 taken 41199 times.
✓ Branch 1 taken 1021 times.
✓ Branch 2 taken 34950 times.
✓ Branch 3 taken 6249 times.
42220 if (tx == TX_4X4 && edges[mode].needs_topright) {
177
3/4
✓ Branch 0 taken 34950 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 34934 times.
34950 if (have_top && have_right &&
178
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 n_px_need + n_px_need_tr <= n_px_have) {
179 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
180 } else {
181
4/4
✓ Branch 0 taken 33220 times.
✓ Branch 1 taken 1730 times.
✓ Branch 2 taken 6920 times.
✓ Branch 3 taken 1730 times.
41870 memset_bpp(*a, 4, *a, 3, 4);
182 }
183 }
184 }
185 }
186
2/2
✓ Branch 0 taken 1275702 times.
✓ Branch 1 taken 269399 times.
1545101 if (edges[mode].needs_left) {
187
2/2
✓ Branch 0 taken 1272671 times.
✓ Branch 1 taken 3031 times.
1275702 if (have_left) {
188 1272671 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
189
2/2
✓ Branch 0 taken 866887 times.
✓ Branch 1 taken 405784 times.
1272671 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
190
2/2
✓ Branch 0 taken 866887 times.
✓ Branch 1 taken 405784 times.
1272671 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
191
192
2/2
✓ Branch 0 taken 63656 times.
✓ Branch 1 taken 1209015 times.
1272671 if (edges[mode].invert_left) {
193
2/2
✓ Branch 0 taken 63648 times.
✓ Branch 1 taken 8 times.
63656 if (n_px_need <= n_px_have) {
194
2/2
✓ Branch 0 taken 319740 times.
✓ Branch 1 taken 63648 times.
383388 for (i = 0; i < n_px_need; i++)
195
2/2
✓ Branch 0 taken 306388 times.
✓ Branch 1 taken 13352 times.
319740 assign_bpp(l, i, &dst[i * stride], -1);
196 } else {
197
2/2
✓ Branch 0 taken 96 times.
✓ Branch 1 taken 8 times.
104 for (i = 0; i < n_px_have; i++)
198
1/2
✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
96 assign_bpp(l, i, &dst[i * stride], -1);
199
1/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
8 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
200 }
201 } else {
202
2/2
✓ Branch 0 taken 1207145 times.
✓ Branch 1 taken 1870 times.
1209015 if (n_px_need <= n_px_have) {
203
2/2
✓ Branch 0 taken 6894840 times.
✓ Branch 1 taken 1207145 times.
8101985 for (i = 0; i < n_px_need; i++)
204
2/2
✓ Branch 0 taken 6261936 times.
✓ Branch 1 taken 632904 times.
6894840 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
205 } else {
206
2/2
✓ Branch 0 taken 17544 times.
✓ Branch 1 taken 1870 times.
19414 for (i = 0; i < n_px_have; i++)
207
2/2
✓ Branch 0 taken 15048 times.
✓ Branch 1 taken 2496 times.
17544 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
208
4/4
✓ Branch 0 taken 1636 times.
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 2496 times.
✓ Branch 3 taken 234 times.
4366 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
209 }
210 }
211 } else {
212
4/4
✓ Branch 0 taken 2930 times.
✓ Branch 1 taken 101 times.
✓ Branch 2 taken 852 times.
✓ Branch 3 taken 101 times.
3883 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
213 }
214 }
215
216 1545101 return mode;
217 }
218
219 286516 static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off,
220 ptrdiff_t uv_off, int bytesperpixel)
221 {
222 286516 const VP9Context *s = td->s;
223 286516 VP9Block *b = td->b;
224 286516 int row = td->row, col = td->col;
225 286516 int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
226 286516 int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
227 286516 int end_x = FFMIN(2 * (s->cols - col), w4);
228 286516 int end_y = FFMIN(2 * (s->rows - row), h4);
229 286516 int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
230 286516 int uvstep1d = 1 << b->uvtx, p;
231 286516 uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
232 286516 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
233 286516 LOCAL_ALIGNED_32(uint8_t, l, [64]);
234
235
2/2
✓ Branch 0 taken 447712 times.
✓ Branch 1 taken 286516 times.
734228 for (n = 0, y = 0; y < end_y; y += step1d) {
236 447712 uint8_t *ptr = dst, *ptr_r = dst_r;
237
2/2
✓ Branch 0 taken 838725 times.
✓ Branch 1 taken 447712 times.
1286437 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
238 838725 ptr_r += 4 * step1d * bytesperpixel, n += step) {
239
1/2
✓ Branch 0 taken 345912 times.
✗ Branch 1 not taken.
345912 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
240
2/2
✓ Branch 0 taken 345912 times.
✓ Branch 1 taken 492813 times.
1184637 y * 2 + x : 0];
241 838725 uint8_t *a = &a_buf[32];
242 838725 enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
243
4/4
✓ Branch 0 taken 724896 times.
✓ Branch 1 taken 113829 times.
✓ Branch 2 taken 42503 times.
✓ Branch 3 taken 682393 times.
838725 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
244
245 838725 mode = check_intra_mode(td, mode, &a, ptr_r,
246 838725 s->s.frames[CUR_FRAME].tf.f->linesize[0],
247 ptr, td->y_stride, l,
248 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
249 838725 s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a);
250
2/2
✓ Branch 0 taken 575350 times.
✓ Branch 1 taken 263375 times.
838725 if (eob)
251 575350 s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride,
252 575350 td->block + 16 * n * bytesperpixel, eob);
253 }
254 447712 dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
255 447712 dst += 4 * step1d * td->y_stride;
256 }
257
258 // U/V
259 286516 w4 >>= s->ss_h;
260 286516 end_x >>= s->ss_h;
261 286516 end_y >>= s->ss_v;
262 286516 step = 1 << (b->uvtx * 2);
263
2/2
✓ Branch 0 taken 573032 times.
✓ Branch 1 taken 286516 times.
859548 for (p = 0; p < 2; p++) {
264 573032 dst = td->dst[1 + p];
265 573032 dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
266
2/2
✓ Branch 0 taken 617032 times.
✓ Branch 1 taken 573032 times.
1190064 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
267 617032 uint8_t *ptr = dst, *ptr_r = dst_r;
268
2/2
✓ Branch 0 taken 706376 times.
✓ Branch 1 taken 617032 times.
1323408 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
269 706376 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
270 706376 int mode = b->uvmode;
271 706376 uint8_t *a = &a_buf[32];
272
4/4
✓ Branch 0 taken 583478 times.
✓ Branch 1 taken 122898 times.
✓ Branch 2 taken 16110 times.
✓ Branch 3 taken 567368 times.
706376 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
273
274 706376 mode = check_intra_mode(td, mode, &a, ptr_r,
275 706376 s->s.frames[CUR_FRAME].tf.f->linesize[1],
276 ptr, td->uv_stride, l, col, x, w4, row, y,
277 706376 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
278 706376 s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a);
279
2/2
✓ Branch 0 taken 234008 times.
✓ Branch 1 taken 472368 times.
706376 if (eob)
280 234008 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
281 234008 td->uvblock[p] + 16 * n * bytesperpixel, eob);
282 }
283 617032 dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
284 617032 dst += 4 * uvstep1d * td->uv_stride;
285 }
286 }
287 286516 }
288
289 262487 void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
290 {
291 262487 intra_recon(td, y_off, uv_off, 1);
292 262487 }
293
294 24029 void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
295 {
296 24029 intra_recon(td, y_off, uv_off, 2);
297 24029 }
298
299 642439 static av_always_inline void mc_luma_unscaled(VP9TileData *td, const vp9_mc_func (*mc)[2],
300 uint8_t *dst, ptrdiff_t dst_stride,
301 const uint8_t *ref, ptrdiff_t ref_stride,
302 const ProgressFrame *ref_frame,
303 ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
304 int bw, int bh, int w, int h, int bytesperpixel)
305 {
306 642439 const VP9Context *s = td->s;
307 642439 int mx = mv->x, my = mv->y, th;
308
309 642439 y += my >> 3;
310 642439 x += mx >> 3;
311 642439 ref += y * ref_stride + x * bytesperpixel;
312 642439 mx &= 7;
313 642439 my &= 7;
314 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
315 // we use +7 because the last 7 pixels of each sbrow can be changed in
316 // the longest loopfilter of the next sbrow
317
2/2
✓ Branch 0 taken 439226 times.
✓ Branch 1 taken 203213 times.
642439 th = (y + bh + 4 * !!my + 7) >> 6;
318 642439 ff_progress_frame_await(ref_frame, FFMAX(th, 0));
319 // The arm/aarch64 _hv filters read one more row than what actually is
320 // needed, so switch to emulated edge one pixel sooner vertically
321 // (!!my * 5) than horizontally (!!mx * 4).
322 // The arm/aarch64 _h filters read one more pixel than what actually is
323 // needed, so switch to emulated edge if that would read beyond the bottom
324 // right block.
325
8/8
✓ Branch 0 taken 467327 times.
✓ Branch 1 taken 175112 times.
✓ Branch 2 taken 637433 times.
✓ Branch 3 taken 5006 times.
✓ Branch 4 taken 436615 times.
✓ Branch 5 taken 200818 times.
✓ Branch 6 taken 620001 times.
✓ Branch 7 taken 17432 times.
642439 if (x < !!mx * 3 || y < !!my * 3 ||
326 620001 ((ARCH_AARCH64 || ARCH_ARM) && (x + !!mx * 5 > w - bw) && (y + !!my * 5 + 1 > h - bh)) ||
327
8/8
✓ Branch 0 taken 448749 times.
✓ Branch 1 taken 171252 times.
✓ Branch 2 taken 593770 times.
✓ Branch 3 taken 26231 times.
✓ Branch 4 taken 404486 times.
✓ Branch 5 taken 189284 times.
✓ Branch 6 taken 20872 times.
✓ Branch 7 taken 572898 times.
620001 x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
328 417246 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
329
4/4
✓ Branch 0 taken 48794 times.
✓ Branch 1 taken 20747 times.
✓ Branch 2 taken 50515 times.
✓ Branch 3 taken 19026 times.
69541 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
330 160, ref_stride,
331
2/2
✓ Branch 0 taken 50515 times.
✓ Branch 1 taken 19026 times.
69541 bw + !!mx * 7, bh + !!my * 7,
332
6/6
✓ Branch 0 taken 48794 times.
✓ Branch 1 taken 20747 times.
✓ Branch 2 taken 50515 times.
✓ Branch 3 taken 19026 times.
✓ Branch 4 taken 48794 times.
✓ Branch 5 taken 20747 times.
69541 x - !!mx * 3, y - !!my * 3, w, h);
333
4/4
✓ Branch 0 taken 48794 times.
✓ Branch 1 taken 20747 times.
✓ Branch 2 taken 50515 times.
✓ Branch 3 taken 19026 times.
69541 ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
334 69541 ref_stride = 160;
335 }
336
2/2
✓ Branch 0 taken 467327 times.
✓ Branch 1 taken 175112 times.
642439 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
337 642439 }
338
339 561917 static av_always_inline void mc_chroma_unscaled(VP9TileData *td, const vp9_mc_func (*mc)[2],
340 uint8_t *dst_u, uint8_t *dst_v,
341 ptrdiff_t dst_stride,
342 const uint8_t *ref_u, ptrdiff_t src_stride_u,
343 const uint8_t *ref_v, ptrdiff_t src_stride_v,
344 const ProgressFrame *ref_frame,
345 ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
346 int bw, int bh, int w, int h, int bytesperpixel)
347 {
348 561917 const VP9Context *s = td->s;
349
4/4
✓ Branch 0 taken 20366 times.
✓ Branch 1 taken 541551 times.
✓ Branch 2 taken 16495 times.
✓ Branch 3 taken 545422 times.
561917 int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
350
351 561917 y += my >> 4;
352 561917 x += mx >> 4;
353 561917 ref_u += y * src_stride_u + x * bytesperpixel;
354 561917 ref_v += y * src_stride_v + x * bytesperpixel;
355 561917 mx &= 15;
356 561917 my &= 15;
357 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
358 // we use +7 because the last 7 pixels of each sbrow can be changed in
359 // the longest loopfilter of the next sbrow
360
2/2
✓ Branch 0 taken 437482 times.
✓ Branch 1 taken 124435 times.
561917 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
361 561917 ff_progress_frame_await(ref_frame, FFMAX(th, 0));
362 // The arm/aarch64 _hv filters read one more row than what actually is
363 // needed, so switch to emulated edge one pixel sooner vertically
364 // (!!my * 5) than horizontally (!!mx * 4).
365 // The arm/aarch64 _h filters read one more pixel than what actually is
366 // needed, so switch to emulated edge if that would read beyond the bottom
367 // right block.
368
8/8
✓ Branch 0 taken 464134 times.
✓ Branch 1 taken 97783 times.
✓ Branch 2 taken 549715 times.
✓ Branch 3 taken 12202 times.
✓ Branch 4 taken 428439 times.
✓ Branch 5 taken 121276 times.
✓ Branch 6 taken 532220 times.
✓ Branch 7 taken 17495 times.
561917 if (x < !!mx * 3 || y < !!my * 3 ||
369 532220 ((ARCH_AARCH64 || ARCH_ARM) && (x + !!mx * 5 > w - bw) && (y + !!my * 5 + 1 > h - bh)) ||
370
8/8
✓ Branch 0 taken 436501 times.
✓ Branch 1 taken 95719 times.
✓ Branch 2 taken 501373 times.
✓ Branch 3 taken 30847 times.
✓ Branch 4 taken 389197 times.
✓ Branch 5 taken 112176 times.
✓ Branch 6 taken 25025 times.
✓ Branch 7 taken 476348 times.
532220 x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
371 513414 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
372
4/4
✓ Branch 0 taken 68164 times.
✓ Branch 1 taken 17405 times.
✓ Branch 2 taken 71537 times.
✓ Branch 3 taken 14032 times.
85569 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
373 160, src_stride_u,
374
2/2
✓ Branch 0 taken 71537 times.
✓ Branch 1 taken 14032 times.
85569 bw + !!mx * 7, bh + !!my * 7,
375
6/6
✓ Branch 0 taken 68164 times.
✓ Branch 1 taken 17405 times.
✓ Branch 2 taken 71537 times.
✓ Branch 3 taken 14032 times.
✓ Branch 4 taken 68164 times.
✓ Branch 5 taken 17405 times.
85569 x - !!mx * 3, y - !!my * 3, w, h);
376
4/4
✓ Branch 0 taken 68164 times.
✓ Branch 1 taken 17405 times.
✓ Branch 2 taken 71537 times.
✓ Branch 3 taken 14032 times.
85569 ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
377
2/2
✓ Branch 0 taken 71537 times.
✓ Branch 1 taken 14032 times.
85569 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
378
379 513414 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
380
4/4
✓ Branch 0 taken 68164 times.
✓ Branch 1 taken 17405 times.
✓ Branch 2 taken 71537 times.
✓ Branch 3 taken 14032 times.
85569 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
381 160, src_stride_v,
382
2/2
✓ Branch 0 taken 71537 times.
✓ Branch 1 taken 14032 times.
85569 bw + !!mx * 7, bh + !!my * 7,
383
6/6
✓ Branch 0 taken 68164 times.
✓ Branch 1 taken 17405 times.
✓ Branch 2 taken 71537 times.
✓ Branch 3 taken 14032 times.
✓ Branch 4 taken 68164 times.
✓ Branch 5 taken 17405 times.
85569 x - !!mx * 3, y - !!my * 3, w, h);
384
4/4
✓ Branch 0 taken 68164 times.
✓ Branch 1 taken 17405 times.
✓ Branch 2 taken 71537 times.
✓ Branch 3 taken 14032 times.
85569 ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
385
2/2
✓ Branch 0 taken 71537 times.
✓ Branch 1 taken 14032 times.
85569 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
386 } else {
387
2/2
✓ Branch 0 taken 392597 times.
✓ Branch 1 taken 83751 times.
476348 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
388
2/2
✓ Branch 0 taken 392597 times.
✓ Branch 1 taken 83751 times.
476348 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
389 }
390 561917 }
391
392 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
393 px, py, pw, ph, bw, bh, w, h, i) \
394 mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
395 mv, bw, bh, w, h, bytesperpixel)
396 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
397 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
398 mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
399 row, col, mv, bw, bh, w, h, bytesperpixel)
400 #define SCALED 0
401 #define FN(x) x##_8bpp
402 #define BYTES_PER_PIXEL 1
403 #include "vp9_mc_template.c"
404 #undef FN
405 #undef BYTES_PER_PIXEL
406 #define FN(x) x##_16bpp
407 #define BYTES_PER_PIXEL 2
408 #include "vp9_mc_template.c"
409 #undef mc_luma_dir
410 #undef mc_chroma_dir
411 #undef FN
412 #undef BYTES_PER_PIXEL
413 #undef SCALED
414
415 689 static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc,
416 const vp9_mc_func (*mc)[2],
417 uint8_t *dst, ptrdiff_t dst_stride,
418 const uint8_t *ref, ptrdiff_t ref_stride,
419 const ProgressFrame *ref_frame,
420 ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
421 int px, int py, int pw, int ph,
422 int bw, int bh, int w, int h, int bytesperpixel,
423 const uint16_t *scale, const uint8_t *step)
424 {
425 689 const VP9Context *s = td->s;
426
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 689 times.
689 if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
427 s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
428 mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame,
429 y, x, in_mv, bw, bh, w, h, bytesperpixel);
430 } else {
431 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
432 int mx, my;
433 int refbw_m1, refbh_m1;
434 int th;
435 VP9mv mv;
436
437 689 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
438 689 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
439 // BUG libvpx seems to scale the two components separately. This introduces
440 // rounding errors but we have to reproduce them to be exactly compatible
441 // with the output from libvpx...
442 689 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
443 689 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
444
445 689 y = my >> 4;
446 689 x = mx >> 4;
447 689 ref += y * ref_stride + x * bytesperpixel;
448 689 mx &= 15;
449 689 my &= 15;
450 689 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
451 689 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
452 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
453 // we use +7 because the last 7 pixels of each sbrow can be changed in
454 // the longest loopfilter of the next sbrow
455 689 th = (y + refbh_m1 + 4 + 7) >> 6;
456 689 ff_progress_frame_await(ref_frame, FFMAX(th, 0));
457 // The arm/aarch64 _hv filters read one more row than what actually is
458 // needed, so switch to emulated edge one pixel sooner vertically
459 // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
460
8/8
✓ Branch 0 taken 649 times.
✓ Branch 1 taken 40 times.
✓ Branch 2 taken 607 times.
✓ Branch 3 taken 42 times.
✓ Branch 4 taken 586 times.
✓ Branch 5 taken 21 times.
✓ Branch 6 taken 31 times.
✓ Branch 7 taken 555 times.
689 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
461 134 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
462 134 ref - 3 * ref_stride - 3 * bytesperpixel,
463 288, ref_stride,
464 refbw_m1 + 8, refbh_m1 + 8,
465 134 x - 3, y - 3, w, h);
466 134 ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
467 134 ref_stride = 288;
468 }
469 689 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
470 }
471 689 }
472
473 689 static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc,
474 const vp9_mc_func (*mc)[2],
475 uint8_t *dst_u, uint8_t *dst_v,
476 ptrdiff_t dst_stride,
477 const uint8_t *ref_u, ptrdiff_t src_stride_u,
478 const uint8_t *ref_v, ptrdiff_t src_stride_v,
479 const ProgressFrame *ref_frame,
480 ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
481 int px, int py, int pw, int ph,
482 int bw, int bh, int w, int h, int bytesperpixel,
483 const uint16_t *scale, const uint8_t *step)
484 {
485 689 const VP9Context *s = td->s;
486
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 689 times.
689 if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
487 s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
488 mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
489 ref_v, src_stride_v, ref_frame,
490 y, x, in_mv, bw, bh, w, h, bytesperpixel);
491 } else {
492 int mx, my;
493 int refbw_m1, refbh_m1;
494 int th;
495 VP9mv mv;
496
497
1/2
✓ Branch 0 taken 689 times.
✗ Branch 1 not taken.
689 if (s->ss_h) {
498 // BUG https://code.google.com/p/webm/issues/detail?id=820
499 689 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
500 689 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
501 } else {
502 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
503 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
504 }
505
1/2
✓ Branch 0 taken 689 times.
✗ Branch 1 not taken.
689 if (s->ss_v) {
506 // BUG https://code.google.com/p/webm/issues/detail?id=820
507 689 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
508 689 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
509 } else {
510 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
511 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
512 }
513 #undef scale_mv
514 689 y = my >> 4;
515 689 x = mx >> 4;
516 689 ref_u += y * src_stride_u + x * bytesperpixel;
517 689 ref_v += y * src_stride_v + x * bytesperpixel;
518 689 mx &= 15;
519 689 my &= 15;
520 689 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
521 689 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
522 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
523 // we use +7 because the last 7 pixels of each sbrow can be changed in
524 // the longest loopfilter of the next sbrow
525 689 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
526 689 ff_progress_frame_await(ref_frame, FFMAX(th, 0));
527 // The arm/aarch64 _hv filters read one more row than what actually is
528 // needed, so switch to emulated edge one pixel sooner vertically
529 // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
530
8/8
✓ Branch 0 taken 633 times.
✓ Branch 1 taken 56 times.
✓ Branch 2 taken 583 times.
✓ Branch 3 taken 50 times.
✓ Branch 4 taken 553 times.
✓ Branch 5 taken 30 times.
✓ Branch 6 taken 41 times.
✓ Branch 7 taken 512 times.
689 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
531 177 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
532 177 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
533 288, src_stride_u,
534 refbw_m1 + 8, refbh_m1 + 8,
535 177 x - 3, y - 3, w, h);
536 177 ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
537 177 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
538
539 177 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
540 177 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
541 288, src_stride_v,
542 refbw_m1 + 8, refbh_m1 + 8,
543 177 x - 3, y - 3, w, h);
544 177 ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
545 177 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
546 } else {
547 512 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
548 512 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
549 }
550 }
551 689 }
552
553 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
554 px, py, pw, ph, bw, bh, w, h, i) \
555 mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
556 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
557 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
558 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
559 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
560 mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
561 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
562 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
563 #define SCALED 1
564 #define FN(x) x##_scaled_8bpp
565 #define BYTES_PER_PIXEL 1
566 #include "vp9_mc_template.c"
567 #undef FN
568 #undef BYTES_PER_PIXEL
569 #define FN(x) x##_scaled_16bpp
570 #define BYTES_PER_PIXEL 2
571 #include "vp9_mc_template.c"
572 #undef mc_luma_dir
573 #undef mc_chroma_dir
574 #undef FN
575 #undef BYTES_PER_PIXEL
576 #undef SCALED
577
578 530281 static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
579 {
580 530281 const VP9Context *s = td->s;
581 530281 VP9Block *b = td->b;
582 530281 int row = td->row, col = td->col;
583
584
1/2
✓ Branch 0 taken 530281 times.
✗ Branch 1 not taken.
530281 if (s->mvscale[b->ref[0]][0] == REF_INVALID_SCALE ||
585
3/4
✓ Branch 0 taken 27251 times.
✓ Branch 1 taken 503030 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 27251 times.
530281 (b->comp && s->mvscale[b->ref[1]][0] == REF_INVALID_SCALE)) {
586 if (!s->td->error_info) {
587 s->td->error_info = AVERROR_INVALIDDATA;
588 av_log(NULL, AV_LOG_ERROR, "Bitstream not supported, "
589 "reference frame has invalid dimensions\n");
590 }
591 return;
592 }
593
594
5/6
✓ Branch 0 taken 529592 times.
✓ Branch 1 taken 689 times.
✓ Branch 2 taken 27251 times.
✓ Branch 3 taken 502341 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 27251 times.
530281 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
595
1/2
✓ Branch 0 taken 689 times.
✗ Branch 1 not taken.
689 if (bytesperpixel == 1) {
596 689 inter_pred_scaled_8bpp(td);
597 } else {
598 inter_pred_scaled_16bpp(td);
599 }
600 } else {
601
2/2
✓ Branch 0 taken 513805 times.
✓ Branch 1 taken 15787 times.
529592 if (bytesperpixel == 1) {
602 513805 inter_pred_8bpp(td);
603 } else {
604 15787 inter_pred_16bpp(td);
605 }
606 }
607
608
2/2
✓ Branch 0 taken 137847 times.
✓ Branch 1 taken 392434 times.
530281 if (!b->skip) {
609 /* mostly copied intra_recon() */
610
611 137847 int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
612 137847 int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
613 137847 int end_x = FFMIN(2 * (s->cols - col), w4);
614 137847 int end_y = FFMIN(2 * (s->rows - row), h4);
615 137847 int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
616 137847 int uvstep1d = 1 << b->uvtx, p;
617 137847 uint8_t *dst = td->dst[0];
618
619 // y itxfm add
620
2/2
✓ Branch 0 taken 227388 times.
✓ Branch 1 taken 137847 times.
365235 for (n = 0, y = 0; y < end_y; y += step1d) {
621 227388 uint8_t *ptr = dst;
622
2/2
✓ Branch 0 taken 422951 times.
✓ Branch 1 taken 227388 times.
650339 for (x = 0; x < end_x; x += step1d,
623 422951 ptr += 4 * step1d * bytesperpixel, n += step) {
624
2/2
✓ Branch 0 taken 17838 times.
✓ Branch 1 taken 405113 times.
422951 int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
625
626
2/2
✓ Branch 0 taken 240652 times.
✓ Branch 1 taken 182299 times.
422951 if (eob)
627 240652 s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride,
628 240652 td->block + 16 * n * bytesperpixel, eob);
629 }
630 227388 dst += 4 * td->y_stride * step1d;
631 }
632
633 // uv itxfm add
634 137847 end_x >>= s->ss_h;
635 137847 end_y >>= s->ss_v;
636 137847 step = 1 << (b->uvtx * 2);
637
2/2
✓ Branch 0 taken 275694 times.
✓ Branch 1 taken 137847 times.
413541 for (p = 0; p < 2; p++) {
638 275694 dst = td->dst[p + 1];
639
2/2
✓ Branch 0 taken 300834 times.
✓ Branch 1 taken 275694 times.
576528 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
640 300834 uint8_t *ptr = dst;
641
2/2
✓ Branch 0 taken 342698 times.
✓ Branch 1 taken 300834 times.
643532 for (x = 0; x < end_x; x += uvstep1d,
642 342698 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
643
2/2
✓ Branch 0 taken 7772 times.
✓ Branch 1 taken 334926 times.
342698 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
644
645
2/2
✓ Branch 0 taken 66756 times.
✓ Branch 1 taken 275942 times.
342698 if (eob)
646 66756 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
647 66756 td->uvblock[p] + 16 * n * bytesperpixel, eob);
648 }
649 300834 dst += 4 * uvstep1d * td->uv_stride;
650 }
651 }
652 }
653 }
654
655 514494 void ff_vp9_inter_recon_8bpp(VP9TileData *td)
656 {
657 514494 inter_recon(td, 1);
658 514494 }
659
660 15787 void ff_vp9_inter_recon_16bpp(VP9TileData *td)
661 {
662 15787 inter_recon(td, 2);
663 15787 }
664