GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/vc1_loopfilter.c Lines: 720 752 95.7 %
Date: 2020-09-25 23:16:12 Branches: 691 792 87.2 %

Line Branch Exec Source
1
/*
2
 * VC-1 and WMV3 decoder
3
 * Copyright (c) 2011 Mashiat Sarker Shakkhar
4
 * Copyright (c) 2006-2007 Konstantin Shishkov
5
 * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
6
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23
24
/**
25
 * @file
26
 * VC-1 and WMV3 loopfilter
27
 */
28
29
#include "avcodec.h"
30
#include "mpegvideo.h"
31
#include "vc1.h"
32
#include "vc1dsp.h"
33
34
1344
static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
35
                                                  int16_t (*right_block)[64], int left_fieldtx,
36
                                                  int right_fieldtx, int block_num)
37
{
38

1344
    switch (block_num) {
39
210
    case 0:
40


210
        v->vc1dsp.vc1_h_s_overlap(left_block[2],
41
                                  right_block[0],
42
                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
43
                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
44
                                  left_fieldtx || right_fieldtx ? 0 : 1);
45
210
        break;
46
47
252
    case 1:
48
252
        v->vc1dsp.vc1_h_s_overlap(right_block[0],
49
252
                                  right_block[2],
50
                                  8,
51
                                  8,
52
                                  right_fieldtx ? 0 : 1);
53
252
        break;
54
55
210
    case 2:
56



420
        v->vc1dsp.vc1_h_s_overlap(!left_fieldtx && right_fieldtx ? left_block[2] + 8 : left_block[3],
57
                                  left_fieldtx && !right_fieldtx ? right_block[0] + 8 : right_block[1],
58
                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
59
                                  left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
60
210
                                  left_fieldtx || right_fieldtx ? 2 : 1);
61
210
        break;
62
63
252
    case 3:
64
252
        v->vc1dsp.vc1_h_s_overlap(right_block[1],
65
252
                                  right_block[3],
66
                                  8,
67
                                  8,
68
                                  right_fieldtx ? 2 : 1);
69
252
        break;
70
71
420
    case 4:
72
    case 5:
73
420
        v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num], 8, 8, 1);
74
420
        break;
75
    }
76
1344
}
77
78
1228
static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
79
                                                  int16_t (*bottom_block)[64], int block_num)
80
{
81

1228
    switch (block_num) {
82
181
    case 0:
83
181
        v->vc1dsp.vc1_v_s_overlap(top_block[1], bottom_block[0]);
84
181
        break;
85
86
181
    case 1:
87
181
        v->vc1dsp.vc1_v_s_overlap(top_block[3], bottom_block[2]);
88
181
        break;
89
90
252
    case 2:
91
252
        v->vc1dsp.vc1_v_s_overlap(bottom_block[0], bottom_block[1]);
92
252
        break;
93
94
252
    case 3:
95
252
        v->vc1dsp.vc1_v_s_overlap(bottom_block[2], bottom_block[3]);
96
252
        break;
97
98
362
    case 4:
99
    case 5:
100
362
        v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
101
362
        break;
102
    }
103
1228
}
104
105
399
void ff_vc1_i_overlap_filter(VC1Context *v)
106
{
107
399
    MpegEncContext *s = &v->s;
108
    int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
109
399
    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
110
399
    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
111
    int i;
112
113
399
    topleft_blk = v->block[v->topleft_blk_idx];
114
399
    top_blk = v->block[v->top_blk_idx];
115
399
    left_blk = v->block[v->left_blk_idx];
116
399
    cur_blk = v->block[v->cur_blk_idx];
117
118
    /* Within a MB, the horizontal overlap always runs before the vertical.
119
     * To accomplish that, we run the H on the left and internal vertical
120
     * borders of the currently decoded MB. Then, we wait for the next overlap
121
     * iteration to do H overlap on the right edge of this MB, before moving
122
     * over and running the V overlap on the top and internal horizontal
123
     * borders. Therefore, the H overlap trails by one MB col and the
124
     * V overlap trails by one MB row. This is reflected in the time at which
125
     * we run the put_pixels loop, i.e. delayed by one row and one column. */
126
2793
    for (i = 0; i < block_count; i++) {
127

2394
        if (s->mb_x == 0 && (i & 5) != 1)
128
96
            continue;
129
130

2298
        if (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
131
2298
                           (v->condover == CONDOVER_ALL ||
132
2298
                            (v->over_flags_plane[mb_pos] &&
133

1448
                             ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))))
134
4032
            vc1_h_overlap_filter(v,
135
1344
                                 s->mb_x ? left_blk : cur_blk, cur_blk,
136

1344
                                 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
137

1344
                                 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
138
                                 i);
139
    }
140
141
399
    if (v->fcm != ILACE_FRAME)
142
2793
        for (i = 0; i < block_count; i++) {
143

2394
            if (s->first_slice_line && !(i & 2))
144
124
                continue;
145
146
2270
            if (s->mb_x &&
147

2134
                (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
148
2134
                                (v->condover == CONDOVER_ALL ||
149
2134
                                 (v->over_flags_plane[mb_pos - 1] &&
150

1298
                                  ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))))
151
1142
                vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
152
2270
            if (s->mb_x == s->mb_width - 1 &&
153

136
                (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
154
136
                                (v->condover == CONDOVER_ALL ||
155
136
                                 (v->over_flags_plane[mb_pos] &&
156

94
                                  ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride]))))))
157
86
                vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
158
        }
159
399
}
160
161
void ff_vc1_p_overlap_filter(VC1Context *v)
162
{
163
    MpegEncContext *s = &v->s;
164
    int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
165
    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
166
    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
167
    int i;
168
169
    topleft_blk = v->block[v->topleft_blk_idx];
170
    top_blk = v->block[v->top_blk_idx];
171
    left_blk = v->block[v->left_blk_idx];
172
    cur_blk = v->block[v->cur_blk_idx];
173
174
    for (i = 0; i < block_count; i++) {
175
        if (s->mb_x == 0 && (i & 5) != 1)
176
            continue;
177
178
        if (v->mb_type[0][s->block_index[i]] && v->mb_type[0][s->block_index[i] - 1])
179
            vc1_h_overlap_filter(v,
180
                                 s->mb_x ? left_blk : cur_blk, cur_blk,
181
                                 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
182
                                 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
183
                                 i);
184
    }
185
186
    if (v->fcm != ILACE_FRAME)
187
        for (i = 0; i < block_count; i++) {
188
            if (s->first_slice_line && !(i & 2))
189
                continue;
190
191
            if (s->mb_x && v->mb_type[0][s->block_index[i] - 2 + (i > 3)] &&
192
                v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 2 + (i > 3)])
193
                vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
194
            if (s->mb_x == s->mb_width - 1)
195
                if (v->mb_type[0][s->block_index[i]] &&
196
                    v->mb_type[0][s->block_index[i] - s->block_wrap[i]])
197
                    vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
198
        }
199
}
200
201
#define LEFT_EDGE   (1 << 0)
202
#define RIGHT_EDGE  (1 << 1)
203
#define TOP_EDGE    (1 << 2)
204
#define BOTTOM_EDGE (1 << 3)
205
206
182304
static av_always_inline void vc1_i_h_loop_filter(VC1Context *v, uint8_t *dest,
207
                                                 uint32_t flags, int block_num)
208
{
209
182304
    MpegEncContext *s  = &v->s;
210
182304
    int pq = v->pq;
211
    uint8_t *dst;
212
213
182304
    if (block_num & 2)
214
60768
        return;
215
216

121536
    if (!(flags & LEFT_EDGE) || (block_num & 5) == 1) {
217
117312
        if (block_num > 3)
218
57952
            dst = dest;
219
        else
220
59360
            dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
221
222
117312
        if (v->fcm == ILACE_FRAME)
223
32436
            if (block_num > 3) {
224
16184
                v->vc1dsp.vc1_h_loop_filter4(dst, 2 * s->uvlinesize, pq);
225
16184
                v->vc1dsp.vc1_h_loop_filter4(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
226
            } else {
227
16252
                v->vc1dsp.vc1_h_loop_filter8(dst, 2 * s->linesize, pq);
228
16252
                v->vc1dsp.vc1_h_loop_filter8(dst + s->linesize, 2 * s->linesize, pq);
229
            }
230
        else
231
84876
            if (block_num > 3)
232
41768
                v->vc1dsp.vc1_h_loop_filter8(dst, s->uvlinesize, pq);
233
            else
234
43108
                v->vc1dsp.vc1_h_loop_filter16(dst, s->linesize, pq);
235
    }
236
}
237
238
182304
static av_always_inline void vc1_i_v_loop_filter(VC1Context *v, uint8_t *dest,
239
                                                 uint32_t flags, uint8_t fieldtx,
240
                                                 int block_num)
241
{
242
182304
    MpegEncContext *s  = &v->s;
243
182304
    int pq = v->pq;
244
    uint8_t *dst;
245
246
182304
    if ((block_num & 5) == 1)
247
60768
        return;
248
249

121536
    if (!(flags & TOP_EDGE) || block_num & 2) {
250
113970
        if (block_num > 3)
251
55724
            dst = dest;
252
        else
253
58246
            dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
254
255
113970
        if (v->fcm == ILACE_FRAME) {
256
32280
            if (block_num > 3) {
257
16080
                v->vc1dsp.vc1_v_loop_filter8(dst, 2 * s->uvlinesize, pq);
258
16080
                v->vc1dsp.vc1_v_loop_filter8(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
259

16200
            } else if (block_num < 2 || !fieldtx) {
260
13573
                v->vc1dsp.vc1_v_loop_filter16(dst, 2 * s->linesize, pq);
261
13573
                v->vc1dsp.vc1_v_loop_filter16(dst + s->linesize, 2 * s->linesize, pq);
262
            }
263
        } else
264
81690
            if (block_num > 3)
265
39644
                v->vc1dsp.vc1_v_loop_filter8(dst, s->uvlinesize, pq);
266
            else
267
42046
                v->vc1dsp.vc1_v_loop_filter16(dst, s->linesize, pq);
268
    }
269
}
270
271
30384
void ff_vc1_i_loop_filter(VC1Context *v)
272
{
273
30384
    MpegEncContext *s = &v->s;
274
30384
    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
275
30384
    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
276
    uint8_t *dest, fieldtx;
277
30384
    uint32_t flags = 0;
278
    int i;
279
280
    /* Within a MB, the vertical loop filter always runs before the horizontal.
281
     * To accomplish that, we run the V loop filter on top and internal
282
     * horizontal borders of the last overlap filtered MB. Then, we wait for
283
     * the loop filter iteration on the next row to do V loop filter on the
284
     * bottom edge of this MB, before moving over and running the H loop
285
     * filter on the left and internal vertical borders. Therefore, the loop
286
     * filter trails by one row and one column relative to the overlap filter
287
     * and two rows and two columns relative to the decoding loop. */
288
30384
    if (!s->first_slice_line) {
289
27862
        dest = s->dest[0] - 16 * s->linesize - 16;
290
27862
        flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
291
27862
        if (s->mb_x) {
292
26606
            fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
293
186242
            for (i = 0; i < block_count; i++)
294
159636
                vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i);
295
        }
296
27862
        if (s->mb_x == v->end_mb_x - 1) {
297
1256
            dest += 16;
298
1256
            fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
299
8792
            for (i = 0; i < block_count; i++)
300
7536
                vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, fieldtx, i);
301
        }
302
    }
303
30384
    if (s->mb_y == s->end_mb_y - 1) {
304
2522
        dest = s->dest[0] - 16;
305
2522
        flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
306
2522
        if (s->mb_x) {
307
2370
            fieldtx = v->fieldtx_plane[mb_pos - 1];
308
16590
            for (i = 0; i < block_count; i++)
309
14220
                vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i);
310
        }
311
2522
        if (s->mb_x == v->end_mb_x - 1) {
312
152
            dest += 16;
313
152
            fieldtx = v->fieldtx_plane[mb_pos];
314
1064
            for (i = 0; i < block_count; i++)
315
912
                vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, fieldtx, i);
316
        }
317
    }
318
319
30384
    if (s->mb_y >= s->start_mb_y + 2) {
320
25429
        dest = s->dest[0] - 32 * s->linesize - 16;
321
25429
        if (s->mb_x) {
322
24323
            flags = s->mb_x == 1 ? LEFT_EDGE : 0;
323
170261
            for (i = 0; i < block_count; i++)
324
145938
                vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i);
325
        }
326
25429
        if (s->mb_x == v->end_mb_x - 1) {
327
1106
            dest += 16;
328
1106
            flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
329
7742
            for (i = 0; i < block_count; i++)
330
6636
                vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest, flags, i);
331
        }
332
    }
333
30384
    if (s->mb_y == s->end_mb_y - 1) {
334
2522
        if (s->mb_y >= s->start_mb_y + 1) {
335
2433
            dest = s->dest[0] - 16 * s->linesize - 16;
336
2433
            if (s->mb_x) {
337
2283
                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
338
15981
                for (i = 0; i < block_count; i++)
339
13698
                    vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i);
340
            }
341
2433
            if (s->mb_x == v->end_mb_x - 1) {
342
150
                flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
343
150
                dest += 16;
344
1050
                for (i = 0; i < block_count; i++)
345
900
                    vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, i);
346
            }
347
        }
348
2522
        dest = s->dest[0] - 16;
349
2522
        if (s->mb_x) {
350
2370
            flags = s->mb_x == 1 ? LEFT_EDGE : 0;
351
16590
            for (i = 0; i < block_count; i++)
352
14220
                vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i);
353
        }
354
2522
        if (s->mb_x == v->end_mb_x - 1) {
355
152
            dest += 16;
356
152
            flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
357
1064
            for (i = 0; i < block_count; i++)
358
912
                vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, i);
359
        }
360
    }
361
30384
}
362
363
783294
static av_always_inline void vc1_p_h_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
364
                                                 uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
365
                                                 int *ttblk, uint32_t flags, int block_num)
366
{
367
783294
    MpegEncContext *s  = &v->s;
368
783294
    int pq = v->pq;
369
783294
    uint32_t left_cbp = cbp[0] >> (block_num * 4), right_cbp;
370
    uint8_t left_is_intra, right_is_intra;
371
    int tt;
372
783294
    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
373
    uint8_t *dst;
374
375
783294
    if (block_num > 3)
376
261098
        dst = dest;
377
    else
378
522196
        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
379
380

783294
    if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
381
768126
        left_is_intra = is_intra[0] & (1 << block_num);
382
383
768126
        if (block_num > 3) {
384
253514
            right_is_intra = is_intra[1] & (1 << block_num);
385
253514
            right_cbp = cbp[1] >> (block_num * 4);
386
514612
        } else if (block_num & 1) {
387
253514
            right_is_intra = is_intra[1] & (1 << block_num - 1);
388
253514
            right_cbp = cbp[1] >> ((block_num - 1) * 4);
389
        } else {
390
261098
            right_is_intra = is_intra[0] & (1 << block_num + 1);
391
261098
            right_cbp = cbp[0] >> ((block_num + 1) * 4);
392
        }
393
394

768126
        if (left_is_intra || right_is_intra ||
395

494459
            mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1] ||
396

269907
            (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[1]))
397
504015
            v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
398
        else {
399
264111
            idx = (left_cbp | (right_cbp >> 1)) & 5;
400
264111
            if (idx & 1)
401
104079
                v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 8, linesize, pq);
402
264111
            if (idx & 4)
403
103658
                v->vc1dsp.vc1_h_loop_filter4(dst + 8, linesize, pq);
404
        }
405
    }
406
407
783294
    tt = ttblk[0] >> (block_num * 4) & 0xf;
408

783294
    if (tt == TT_4X4 || tt == TT_4X8) {
409
85161
        if (left_cbp & 3)
410
78316
            v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
411
85161
        if (left_cbp & 12)
412
77775
            v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
413
    }
414
783294
}
415
416
783294
static av_always_inline void vc1_p_v_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
417
                                                 uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
418
                                                 int *ttblk, uint32_t flags, int block_num)
419
{
420
783294
    MpegEncContext *s  = &v->s;
421
783294
    int pq = v->pq;
422
783294
    uint32_t top_cbp = cbp[0] >> (block_num * 4), bottom_cbp;
423
    uint8_t top_is_intra, bottom_is_intra;
424
    int tt;
425
783294
    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
426
    uint8_t *dst;
427
428
783294
    if (block_num > 3)
429
261098
        dst = dest;
430
    else
431
522196
        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
432
433

783294
    if(!(flags & BOTTOM_EDGE) || block_num < 2) {
434
649330
        top_is_intra = is_intra[0] & (1 << block_num);
435
436
649330
        if (block_num > 3) {
437
194116
            bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num);
438
194116
            bottom_cbp = cbp[s->mb_stride] >> (block_num * 4);
439
455214
        } else if (block_num < 2) {
440
261098
            bottom_is_intra = is_intra[0] & (1 << block_num + 2);
441
261098
            bottom_cbp = cbp[0] >> ((block_num + 2) * 4);
442
        } else {
443
194116
            bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num - 2);
444
194116
            bottom_cbp = cbp[s->mb_stride] >> ((block_num - 2) * 4);
445
        }
446
447

649330
        if (top_is_intra || bottom_is_intra ||
448

441034
            mv[0][0] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][0] ||
449

279700
            mv[0][1] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][1] ||
450

243990
            (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[block_num > 3 ? s->mb_stride : s->b8_stride]))
451
411689
            v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
452
        else {
453
237641
            idx = (top_cbp | (bottom_cbp >> 2)) & 3;
454
237641
            if (idx & 1)
455
99049
                v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize + 4, linesize, pq);
456
237641
            if (idx & 2)
457
99021
                v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize, linesize, pq);
458
        }
459
    }
460
461
783294
    tt = ttblk[0] >> (block_num * 4) & 0xf;
462

783294
    if (tt == TT_4X4 || tt == TT_8X4) {
463
84736
        if (top_cbp & 5)
464
78324
            v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
465
84736
        if (top_cbp & 10)
466
78360
            v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
467
    }
468
783294
}
469
470
130549
void ff_vc1_p_loop_filter(VC1Context *v)
471
{
472
130549
    MpegEncContext *s = &v->s;
473
130549
    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
474
    uint8_t *dest;
475
    uint32_t *cbp;
476
    uint8_t *is_intra;
477
    int16_t (*uvmv)[2];
478
    int *ttblk;
479
    uint32_t flags;
480
    int i;
481
482
    /* Within a MB, the vertical loop filter always runs before the horizontal.
483
     * To accomplish that, we run the V loop filter on all applicable
484
     * horizontal borders of the MB above the last overlap filtered MB. Then,
485
     * we wait for the next loop filter iteration to do H loop filter on all
486
     * applicable vertical borders of this MB. Therefore, the loop filter
487
     * trails by one row and one column relative to the overlap filter and two
488
     * rows and two columns relative to the decoding loop. */
489
130549
    if (s->mb_y >= s->start_mb_y + 2) {
490
82638
        if (s->mb_x) {
491
80141
            dest = s->dest[0] - 32 * s->linesize - 16;
492
80141
            cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
493
80141
            is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
494
80141
            uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
495
80141
            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
496
80141
            flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
497
560987
            for (i = 0; i < block_count; i++)
498

1442538
                vc1_p_v_loop_filter(v,
499
160282
                                    i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
500
                                    cbp,
501
                                    is_intra,
502
                                    i > 3 ? uvmv :
503
320564
                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
504
160282
                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
505
320564
                                            &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
506
                                    ttblk,
507
                                    flags,
508
                                    i);
509
        }
510
82638
        if (s->mb_x == s->mb_width - 1) {
511
2497
            dest = s->dest[0] - 32 * s->linesize;
512
2497
            cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
513
2497
            is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
514
2497
            uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
515
2497
            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
516
2497
            flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
517
17479
            for (i = 0; i < block_count; i++)
518

44946
                vc1_p_v_loop_filter(v,
519
4994
                                    i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
520
                                    cbp,
521
                                    is_intra,
522
                                    i > 3 ? uvmv :
523
9988
                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
524
4994
                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
525
9988
                                            &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
526
                                    ttblk,
527
                                    flags,
528
                                    i);
529
        }
530
    }
531
130549
    if (s->mb_y == s->end_mb_y - 1) {
532
33491
        if (s->mb_x) {
533
32628
            if (s->mb_y >= s->start_mb_y + 1) {
534
13988
                dest = s->dest[0] - 16 * s->linesize - 16;
535
13988
                cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
536
13988
                is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
537
13988
                uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
538
13988
                ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
539
13988
                flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
540
97916
                for (i = 0; i < block_count; i++)
541

251784
                    vc1_p_v_loop_filter(v,
542
27976
                                        i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
543
                                        cbp,
544
                                        is_intra,
545
                                        i > 3 ? uvmv :
546
55952
                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
547
27976
                                        i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
548
55952
                                                &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
549
                                        ttblk,
550
                                        flags,
551
                                        i);
552
            }
553
32628
            dest = s->dest[0] - 16;
554
32628
            cbp = &v->cbp[s->mb_x - 1];
555
32628
            is_intra = &v->is_intra[s->mb_x - 1];
556
32628
            uvmv = &v->luma_mv[s->mb_x - 1];
557
32628
            ttblk = &v->ttblk[s->mb_x - 1];
558
32628
            flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
559
228396
            for (i = 0; i < block_count; i++)
560

587304
                vc1_p_v_loop_filter(v,
561
65256
                                    i > 3 ? s->dest[i - 3] - 8 : dest,
562
                                    cbp,
563
                                    is_intra,
564
                                    i > 3 ? uvmv :
565
130512
                                            &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
566
65256
                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
567
130512
                                            &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
568
                                    ttblk,
569
                                    flags,
570
                                    i);
571
        }
572
33491
        if (s->mb_x == s->mb_width - 1) {
573
863
            if (s->mb_y >= s->start_mb_y + 1) {
574
432
                dest = s->dest[0] - 16 * s->linesize;
575
432
                cbp = &v->cbp[s->mb_x - s->mb_stride];
576
432
                is_intra = &v->is_intra[s->mb_x - s->mb_stride];
577
432
                uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
578
432
                ttblk = &v->ttblk[s->mb_x - s->mb_stride];
579
432
                flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
580
3024
                for (i = 0; i < block_count; i++)
581

7776
                    vc1_p_v_loop_filter(v,
582
864
                                        i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
583
                                        cbp,
584
                                        is_intra,
585
                                        i > 3 ? uvmv :
586
1728
                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
587
864
                                        i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
588
1728
                                                &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
589
                                        ttblk,
590
                                        flags,
591
                                        i);
592
            }
593
863
            dest = s->dest[0];
594
863
            cbp = &v->cbp[s->mb_x];
595
863
            is_intra = &v->is_intra[s->mb_x];
596
863
            uvmv = &v->luma_mv[s->mb_x];
597
863
            ttblk = &v->ttblk[s->mb_x];
598
863
            flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
599
6041
            for (i = 0; i < block_count; i++)
600

15534
                vc1_p_v_loop_filter(v,
601
1726
                                    i > 3 ? s->dest[i - 3] : dest,
602
                                    cbp,
603
                                    is_intra,
604
                                    i > 3 ? uvmv :
605
3452
                                            &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
606
1726
                                    i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
607
3452
                                            &v->mv_f[0][s->block_index[i] + v->blocks_off],
608
                                    ttblk,
609
                                    flags,
610
                                    i);
611
        }
612
    }
613
614
130549
    if (s->mb_y >= s->start_mb_y + 2) {
615
82638
        if (s->mb_x >= 2) {
616
77644
            dest = s->dest[0] - 32 * s->linesize - 32;
617
77644
            cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 2];
618
77644
            is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 2];
619
77644
            uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 2];
620
77644
            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
621
77644
            flags = s->mb_x == 2 ? LEFT_EDGE : 0;
622
543508
            for (i = 0; i < block_count; i++)
623

1397592
                vc1_p_h_loop_filter(v,
624
155288
                                    i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
625
                                    cbp,
626
                                    is_intra,
627
                                    i > 3 ? uvmv :
628
310576
                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
629
155288
                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 2 + v->mb_off] :
630
310576
                                            &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
631
                                    ttblk,
632
                                    flags,
633
                                    i);
634
        }
635
82638
        if (s->mb_x == s->mb_width - 1) {
636
2497
            if (s->mb_x >= 1) {
637
2497
                dest = s->dest[0] - 32 * s->linesize - 16;
638
2497
                cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
639
2497
                is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
640
2497
                uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
641
2497
                ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
642
2497
                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
643
17479
                for (i = 0; i < block_count; i++)
644

44946
                        vc1_p_h_loop_filter(v,
645
4994
                                            i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
646
                                            cbp,
647
                                            is_intra,
648
                                            i > 3 ? uvmv :
649
9988
                                                    &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
650
4994
                                            i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
651
9988
                                                    &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
652
                                            ttblk,
653
                                            flags,
654
                                            i);
655
            }
656
2497
            dest = s->dest[0] - 32 * s->linesize;
657
2497
            cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
658
2497
            is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
659
2497
            uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
660
2497
            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
661
2497
            flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
662
17479
            for (i = 0; i < block_count; i++)
663

44946
                vc1_p_h_loop_filter(v,
664
4994
                                    i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
665
                                    cbp,
666
                                    is_intra,
667
                                    i > 3 ? uvmv :
668
9988
                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
669
4994
                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
670
9988
                                            &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
671
                                    ttblk,
672
                                    flags,
673
                                    i);
674
        }
675
    }
676
130549
    if (s->mb_y == s->end_mb_y - 1) {
677
33491
        if (s->mb_y >= s->start_mb_y + 1) {
678
14420
            if (s->mb_x >= 2) {
679
13556
                dest = s->dest[0] - 16 * s->linesize - 32;
680
13556
                cbp = &v->cbp[s->mb_x - s->mb_stride - 2];
681
13556
                is_intra = &v->is_intra[s->mb_x - s->mb_stride - 2];
682
13556
                uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 2];
683
13556
                ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
684
13556
                flags = s->mb_x == 2 ? LEFT_EDGE : 0;
685
94892
                for (i = 0; i < block_count; i++)
686

244008
                    vc1_p_h_loop_filter(v,
687
27112
                                        i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
688
                                        cbp,
689
                                        is_intra,
690
                                        i > 3 ? uvmv :
691
54224
                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
692
27112
                                        i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 2 + v->mb_off] :
693
54224
                                                &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
694
                                        ttblk,
695
                                        flags,
696
                                        i);
697
            }
698
14420
            if (s->mb_x == s->mb_width - 1) {
699
432
                if (s->mb_x >= 1) {
700
432
                    dest = s->dest[0] - 16 * s->linesize - 16;
701
432
                    cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
702
432
                    is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
703
432
                    uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
704
432
                    ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
705
432
                    flags = s->mb_x == 1 ? LEFT_EDGE : 0;
706
3024
                    for (i = 0; i < block_count; i++)
707

7776
                            vc1_p_h_loop_filter(v,
708
864
                                                i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
709
                                                cbp,
710
                                                is_intra,
711
                                                i > 3 ? uvmv :
712
1728
                                                        &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
713
864
                                                i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
714
1728
                                                        &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
715
                                                ttblk,
716
                                                flags,
717
                                                i);
718
                }
719
432
                dest = s->dest[0] - 16 * s->linesize;
720
432
                cbp = &v->cbp[s->mb_x - s->mb_stride];
721
432
                is_intra = &v->is_intra[s->mb_x - s->mb_stride];
722
432
                uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
723
432
                ttblk = &v->ttblk[s->mb_x - s->mb_stride];
724
432
                flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
725
3024
                for (i = 0; i < block_count; i++)
726

7776
                    vc1_p_h_loop_filter(v,
727
864
                                        i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
728
                                        cbp,
729
                                        is_intra,
730
                                        i > 3 ? uvmv :
731
1728
                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
732
864
                                        i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
733
1728
                                                &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
734
                                        ttblk,
735
                                        flags,
736
                                        i);
737
            }
738
        }
739
33491
        if (s->mb_x >= 2) {
740
31765
            dest = s->dest[0] - 32;
741
31765
            cbp = &v->cbp[s->mb_x - 2];
742
31765
            is_intra = &v->is_intra[s->mb_x - 2];
743
31765
            uvmv = &v->luma_mv[s->mb_x - 2];
744
31765
            ttblk = &v->ttblk[s->mb_x - 2];
745
31765
            flags = s->mb_x == 2 ? LEFT_EDGE : 0;
746
222355
            for (i = 0; i < block_count; i++)
747

571770
                vc1_p_h_loop_filter(v,
748
63530
                                    i > 3 ? s->dest[i - 3] - 16 : dest,
749
                                    cbp,
750
                                    is_intra,
751
                                    i > 3 ? uvmv :
752
127060
                                            &s->current_picture.motion_val[0][s->block_index[i] - 4 + v->blocks_off],
753
63530
                                    i > 3 ? &v->mv_f[0][s->block_index[i] - 2 + v->mb_off] :
754
127060
                                            &v->mv_f[0][s->block_index[i] - 4 + v->blocks_off],
755
                                    ttblk,
756
                                    flags,
757
                                    i);
758
        }
759
33491
        if (s->mb_x == s->mb_width - 1) {
760
863
            if (s->mb_x >= 1) {
761
863
                dest = s->dest[0] - 16;
762
863
                cbp = &v->cbp[s->mb_x - 1];
763
863
                is_intra = &v->is_intra[s->mb_x - 1];
764
863
                uvmv = &v->luma_mv[s->mb_x - 1];
765
863
                ttblk = &v->ttblk[s->mb_x - 1];
766
863
                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
767
6041
                for (i = 0; i < block_count; i++)
768

15534
                    vc1_p_h_loop_filter(v,
769
1726
                                        i > 3 ? s->dest[i - 3] - 8 : dest,
770
                                        cbp,
771
                                        is_intra,
772
                                        i > 3 ? uvmv :
773
3452
                                                &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
774
1726
                                        i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
775
3452
                                                &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
776
                                        ttblk,
777
                                        flags,
778
                                        i);
779
            }
780
863
            dest = s->dest[0];
781
863
            cbp = &v->cbp[s->mb_x];
782
863
            is_intra = &v->is_intra[s->mb_x];
783
863
            uvmv = &v->luma_mv[s->mb_x];
784
863
            ttblk = &v->ttblk[s->mb_x];
785
863
            flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
786
6041
            for (i = 0; i < block_count; i++)
787

15534
                vc1_p_h_loop_filter(v,
788
1726
                                    i > 3 ? s->dest[i - 3] : dest,
789
                                    cbp,
790
                                    is_intra,
791
                                    i > 3 ? uvmv :
792
3452
                                            &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
793
1726
                                    i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
794
3452
                                            &v->mv_f[0][s->block_index[i] + v->blocks_off],
795
                                    ttblk,
796
                                    flags,
797
                                    i);
798
        }
799
    }
800
130549
}
801
802
293760
static av_always_inline void vc1_p_h_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
803
                                                       uint32_t flags, uint8_t fieldtx, int block_num)
804
{
805
293760
    MpegEncContext *s  = &v->s;
806
293760
    int pq = v->pq;
807
    int tt;
808
293760
    int linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
809
    uint8_t *dst;
810
811
293760
    if (block_num > 3)
812
97920
        dst = dest;
813
    else
814
195840
        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
815
816
293760
    tt = ttblk[0] >> (block_num * 4) & 0xf;
817
293760
    if (block_num < 4) {
818
195840
        if (fieldtx) {
819
81784
            if (block_num < 2) {
820

40892
                if (tt == TT_4X4 || tt == TT_4X8)
821
7421
                    v->vc1dsp.vc1_h_loop_filter8(dst + 4, 2 * linesize, pq);
822

40892
                if (!(flags & RIGHT_EDGE) || block_num == 0)
823
40722
                    v->vc1dsp.vc1_h_loop_filter8(dst + 8, 2 * linesize, pq);
824
            } else {
825

40892
                if (tt == TT_4X4 || tt == TT_4X8)
826
7266
                    v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 4, 2 * linesize, pq);
827

40892
                if (!(flags & RIGHT_EDGE) || block_num == 2)
828
40722
                    v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 8, 2 * linesize, pq);
829
            }
830
        } else {
831

114056
            if(tt == TT_4X4 || tt == TT_4X8) {
832
22910
                v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
833
22910
                v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
834
            }
835

114056
            if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
836
113580
                v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
837
113580
                v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
838
            }
839
        }
840
    } else {
841

97920
        if (tt == TT_4X4 || tt == TT_4X8) {
842
12278
            v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
843
12278
            v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
844
        }
845
97920
        if (!(flags & RIGHT_EDGE)) {
846
97104
            v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
847
97104
            v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
848
        }
849
    }
850
293760
}
851
852
293760
static av_always_inline void vc1_p_v_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
853
                                                       uint32_t flags, uint8_t fieldtx, int block_num)
854
{
855
293760
    MpegEncContext *s  = &v->s;
856
293760
    int pq = v->pq;
857
    int tt;
858
293760
    int linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
859
    uint8_t *dst;
860
861
293760
    if (block_num > 3)
862
97920
        dst = dest;
863
    else
864
195840
        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
865
866
293760
    tt = ttblk[0] >> (block_num * 4) & 0xf;
867
293760
    if (block_num < 4) {
868
195840
        if (fieldtx) {
869
81784
            if (block_num < 2) {
870

40892
                if (tt == TT_4X4 || tt == TT_8X4)
871
7902
                    v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
872
40892
                if (!(flags & BOTTOM_EDGE))
873
40500
                    v->vc1dsp.vc1_v_loop_filter8(dst + 16 * linesize, 2 * linesize, pq);
874
            } else {
875

40892
                if (tt == TT_4X4 || tt == TT_8X4)
876
7576
                    v->vc1dsp.vc1_v_loop_filter8(dst + linesize, 2 * linesize, pq);
877
40892
                if (!(flags & BOTTOM_EDGE))
878
40500
                    v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
879
            }
880
        } else {
881
114056
            if (block_num < 2) {
882

57028
                if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
883
9647
                    v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
884
9647
                    v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
885
                }
886
57028
                v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
887
57028
                v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
888
57028
            } else if (!(flags & BOTTOM_EDGE)) {
889

55980
                if (tt == TT_4X4 || tt == TT_8X4) {
890
9613
                    v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
891
9613
                    v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
892
                }
893
55980
                v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
894
55980
                v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
895
            }
896
        }
897
    } else {
898
97920
        if (!(flags & BOTTOM_EDGE)) {
899

96480
            if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
900
9701
                v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
901
9701
                v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
902
            }
903
96480
                v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
904
96480
                v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
905
        }
906
    }
907
293760
}
908
909
48960
void ff_vc1_p_intfr_loop_filter(VC1Context *v)
910
{
911
48960
    MpegEncContext *s = &v->s;
912
48960
    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
913
48960
    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
914
    uint8_t *dest;
915
    int *ttblk;
916
    uint32_t flags;
917
    uint8_t fieldtx;
918
    int i;
919
920
    /* Within a MB, the vertical loop filter always runs before the horizontal.
921
     * To accomplish that, we run the V loop filter on all applicable
922
     * horizontal borders of the MB above the last overlap filtered MB. Then,
923
     * we wait for the loop filter iteration on the next row and next column to
924
     * do H loop filter on all applicable vertical borders of this MB.
925
     * Therefore, the loop filter trails by two rows and one column relative to
926
     * the overlap filter and two rows and two columns relative to the decoding
927
     * loop. */
928
48960
    if (s->mb_x) {
929
48552
        if (s->mb_y >= s->start_mb_y + 1) {
930
47838
            dest = s->dest[0] - 16 * s->linesize - 16;
931
47838
            ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
932
47838
            flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
933
47838
            fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
934
334866
            for (i = 0; i < block_count; i++)
935
382704
                vc1_p_v_intfr_loop_filter(v,
936
95676
                                          i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
937
                                          ttblk,
938
                                          flags,
939
                                          fieldtx,
940
                                          i);
941
        }
942
    }
943
48960
    if (s->mb_x == s->mb_width - 1) {
944
408
        if (s->mb_y >= s->start_mb_y + 1) {
945
402
            dest = s->dest[0] - 16 * s->linesize;
946
402
            ttblk = &v->ttblk[s->mb_x - s->mb_stride];
947
402
            flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
948
402
            fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
949
2814
            for (i = 0; i < block_count; i++)
950
3216
                vc1_p_v_intfr_loop_filter(v,
951
804
                                          i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
952
                                          ttblk,
953
                                          flags,
954
                                          fieldtx,
955
                                          i);
956
        }
957
    }
958
48960
    if (s->mb_y == s->end_mb_y - 1) {
959
720
        if (s->mb_x) {
960
714
            dest = s->dest[0] - 16;
961
714
            ttblk = &v->ttblk[s->mb_x - 1];
962
714
            flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
963
714
            fieldtx = v->fieldtx_plane[mb_pos - 1];
964
4998
            for (i = 0; i < block_count; i++)
965
5712
                vc1_p_v_intfr_loop_filter(v,
966
1428
                                          i > 3 ? s->dest[i - 3] - 8 : dest,
967
                                          ttblk,
968
                                          flags,
969
                                          fieldtx,
970
                                          i);
971
        }
972
720
        if (s->mb_x == s->mb_width - 1) {
973
6
            dest = s->dest[0];
974
6
            ttblk = &v->ttblk[s->mb_x];
975
6
            flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
976
6
            fieldtx = v->fieldtx_plane[mb_pos];
977
42
            for (i = 0; i < block_count; i++)
978
48
                vc1_p_v_intfr_loop_filter(v,
979
12
                                          i > 3 ? s->dest[i - 3] : dest,
980
                                          ttblk,
981
                                          flags,
982
                                          fieldtx,
983
                                          i);
984
        }
985
    }
986
987
48960
    if (s->mb_y >= s->start_mb_y + 2) {
988
47520
        if (s->mb_x >= 2) {
989
46728
            dest = s->dest[0] - 32 * s->linesize - 32;
990
46728
            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
991
46728
            flags = s->mb_x == 2 ? LEFT_EDGE : 0;
992
46728
            fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 2];
993
327096
            for (i = 0; i < block_count; i++)
994
373824
                vc1_p_h_intfr_loop_filter(v,
995
93456
                                          i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
996
                                          ttblk,
997
                                          flags,
998
                                          fieldtx,
999
                                          i);
1000
        }
1001
47520
        if (s->mb_x == s->mb_width - 1) {
1002
396
            if (s->mb_x >= 1) {
1003
396
                dest = s->dest[0] - 32 * s->linesize - 16;
1004
396
                ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
1005
396
                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1006
396
                fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 1];
1007
2772
                for (i = 0; i < block_count; i++)
1008
3168
                    vc1_p_h_intfr_loop_filter(v,
1009
792
                                              i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
1010
                                              ttblk,
1011
                                              flags,
1012
                                              fieldtx,
1013
                                              i);
1014
            }
1015
396
            dest = s->dest[0] - 32 * s->linesize;
1016
396
            ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
1017
396
            flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1018
396
            fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride];
1019
2772
            for (i = 0; i < block_count; i++)
1020
3168
                vc1_p_h_intfr_loop_filter(v,
1021
792
                                          i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
1022
                                          ttblk,
1023
                                          flags,
1024
                                          fieldtx,
1025
                                          i);
1026
        }
1027
    }
1028
48960
    if (s->mb_y == s->end_mb_y - 1) {
1029
720
        if (s->mb_y >= s->start_mb_y + 1) {
1030
720
            if (s->mb_x >= 2) {
1031
708
                dest = s->dest[0] - 16 * s->linesize - 32;
1032
708
                ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
1033
708
                flags = s->mb_x == 2 ? LEFT_EDGE : 0;
1034
708
                fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 2];
1035
4956
                for (i = 0; i < block_count; i++)
1036
5664
                    vc1_p_h_intfr_loop_filter(v,
1037
1416
                                              i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
1038
                                              ttblk,
1039
                                              flags,
1040
                                              fieldtx,
1041
                                              i);
1042
            }
1043
720
            if (s->mb_x == s->mb_width - 1) {
1044
6
                if (s->mb_x >= 1) {
1045
6
                    dest = s->dest[0] - 16 * s->linesize - 16;
1046
6
                    ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
1047
6
                    flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1048
6
                    fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
1049
42
                    for (i = 0; i < block_count; i++)
1050
48
                        vc1_p_h_intfr_loop_filter(v,
1051
12
                                                  i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
1052
                                                  ttblk,
1053
                                                  flags,
1054
                                                  fieldtx,
1055
                                                  i);
1056
                }
1057
6
                dest = s->dest[0] - 16 * s->linesize;
1058
6
                ttblk = &v->ttblk[s->mb_x - s->mb_stride];
1059
6
                flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1060
6
                fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
1061
42
                for (i = 0; i < block_count; i++)
1062
48
                    vc1_p_h_intfr_loop_filter(v,
1063
12
                                              i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
1064
                                              ttblk,
1065
                                              flags,
1066
                                              fieldtx,
1067
                                              i);
1068
            }
1069
        }
1070
720
        if (s->mb_x >= 2) {
1071
708
            dest = s->dest[0] - 32;
1072
708
            ttblk = &v->ttblk[s->mb_x - 2];
1073
708
            flags = s->mb_x == 2 ? LEFT_EDGE : 0;
1074
708
            fieldtx = v->fieldtx_plane[mb_pos - 2];
1075
4956
            for (i = 0; i < block_count; i++)
1076
5664
                vc1_p_h_intfr_loop_filter(v,
1077
1416
                                          i > 3 ? s->dest[i - 3] - 16 : dest,
1078
                                          ttblk,
1079
                                          flags,
1080
                                          fieldtx,
1081
                                          i);
1082
        }
1083
720
        if (s->mb_x == s->mb_width - 1) {
1084
6
            if (s->mb_x >= 1) {
1085
6
                dest = s->dest[0] - 16;
1086
6
                ttblk = &v->ttblk[s->mb_x - 1];
1087
6
                flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1088
6
                fieldtx = v->fieldtx_plane[mb_pos - 1];
1089
42
                for (i = 0; i < block_count; i++)
1090
48
                    vc1_p_h_intfr_loop_filter(v,
1091
12
                                              i > 3 ? s->dest[i - 3] - 8 : dest,
1092
                                              ttblk,
1093
                                              flags,
1094
                                              fieldtx,
1095
                                              i);
1096
            }
1097
6
            dest = s->dest[0];
1098
6
            ttblk = &v->ttblk[s->mb_x];
1099
6
            flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1100
6
            fieldtx = v->fieldtx_plane[mb_pos];
1101
42
            for (i = 0; i < block_count; i++)
1102
48
                vc1_p_h_intfr_loop_filter(v,
1103
12
                                          i > 3 ? s->dest[i - 3] : dest,
1104
                                          ttblk,
1105
                                          flags,
1106
                                          fieldtx,
1107
                                          i);
1108
        }
1109
    }
1110
48960
}
1111
1112
309240
static av_always_inline void vc1_b_h_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
1113
                                                       int *ttblk, uint32_t flags, int block_num)
1114
{
1115
309240
    MpegEncContext *s  = &v->s;
1116
309240
    int pq = v->pq;
1117
    uint8_t *dst;
1118
309240
    uint32_t block_cbp = cbp[0] >> (block_num * 4);
1119
    int tt;
1120
309240
    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
1121
1122
309240
    if (block_num > 3)
1123
103080
        dst = dest;
1124
    else
1125
206160
        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
1126
1127

309240
    if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
1128
306472
        if (block_num > 3)
1129
101696
            v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
1130
        else
1131
204776
            v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
1132
    }
1133
1134
309240
    tt = ttblk[0] >> (block_num * 4) & 0xf;
1135

309240
    if (tt == TT_4X4 || tt == TT_4X8) {
1136
38649
        idx = (block_cbp | (block_cbp >> 1)) & 5;
1137
38649
        if (idx & 1)
1138
34919
            v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
1139
38649
        if (idx & 4)
1140
34510
            v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
1141
    }
1142
309240
}
1143
1144
309240
static av_always_inline void vc1_b_v_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
1145
                                                       int *ttblk, uint32_t flags, int block_num)
1146
{
1147
309240
    MpegEncContext *s  = &v->s;
1148
309240
    int pq = v->pq;
1149
    uint8_t *dst;
1150
309240
    uint32_t block_cbp = cbp[0] >> (block_num * 4);
1151
    int tt;
1152
309240
    int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize;
1153
1154
309240
    if (block_num > 3)
1155
103080
        dst = dest;
1156
    else
1157
206160
        dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
1158
1159

309240
    if(!(flags & BOTTOM_EDGE) || block_num < 2)
1160
300360
        v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
1161
1162
309240
    tt = ttblk[0] >> (block_num * 4) & 0xf;
1163

309240
    if (tt == TT_4X4 || tt == TT_8X4) {
1164
38249
        idx = (block_cbp | (block_cbp >> 2)) & 3;
1165
38249
        if (idx & 1)
1166
34613
            v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
1167
38249
        if (idx & 2)
1168
34929
            v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
1169
    }
1170
309240
}
1171
1172
51540
void ff_vc1_b_intfi_loop_filter(VC1Context *v)
1173
{
1174
51540
    MpegEncContext *s = &v->s;
1175
51540
    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
1176
    uint8_t *dest;
1177
    uint32_t *cbp;
1178
    int *ttblk;
1179
51540
    uint32_t flags = 0;
1180
    int i;
1181
1182
    /* Within a MB, the vertical loop filter always runs before the horizontal.
1183
     * To accomplish that, we run the V loop filter on all applicable
1184
     * horizontal borders of the MB above the currently decoded MB. Then,
1185
     * we wait for the next loop filter iteration to do H loop filter on all
1186
     * applicable vertical borders of this MB. Therefore, the loop filter
1187
     * trails by one row and one column relative to the decoding loop. */
1188
51540
    if (!s->first_slice_line) {
1189
49320
        dest = s->dest[0] - 16 * s->linesize;
1190
49320
        cbp = &v->cbp[s->mb_x - s->mb_stride];
1191
49320
        ttblk = &v->ttblk[s->mb_x - s->mb_stride];
1192
49320
        flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
1193
345240
        for (i = 0; i < block_count; i++)
1194
295920
            vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
1195
    }
1196
51540
    if (s->mb_y == s->end_mb_y - 1) {
1197
2220
        dest = s->dest[0];
1198
2220
        cbp = &v->cbp[s->mb_x];
1199
2220
        ttblk = &v->ttblk[s->mb_x];
1200
2220
        flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
1201
15540
        for (i = 0; i < block_count; i++)
1202
13320
            vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
1203
    }
1204
1205
51540
    if (!s->first_slice_line) {
1206
49320
        dest = s->dest[0] - 16 * s->linesize - 16;
1207
49320
        cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
1208
49320
        ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
1209
49320
        if (s->mb_x) {
1210
48664
            flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1211
340648
            for (i = 0; i < block_count; i++)
1212
291984
                vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, cbp, ttblk, flags, i);
1213
        }
1214
49320
        if (s->mb_x == s->mb_width - 1) {
1215
656
            dest += 16;
1216
656
            cbp++;
1217
656
            ttblk++;
1218
656
            flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
1219
4592
            for (i = 0; i < block_count; i++)
1220
3936
                vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
1221
        }
1222
    }
1223
51540
    if (s->mb_y == s->end_mb_y - 1) {
1224
2220
        dest = s->dest[0] - 16;
1225
2220
        cbp = &v->cbp[s->mb_x - 1];
1226
2220
        ttblk = &v->ttblk[s->mb_x - 1];
1227
2220
        if (s->mb_x) {
1228
2184
            flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1229
15288
            for (i = 0; i < block_count; i++)
1230
13104
                vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, cbp, ttblk, flags, i);
1231
        }
1232
2220
        if (s->mb_x == s->mb_width - 1) {
1233
36
            dest += 16;
1234
36
            cbp++;
1235
36
            ttblk++;
1236
36
            flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
1237
252
            for (i = 0; i < block_count; i++)
1238
216
                vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
1239
        }
1240
    }
1241
51540
}