Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* RTP parser for VP9 payload format (draft version 02) - experimental |
3 |
|
|
* Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com> |
4 |
|
|
* |
5 |
|
|
* This file is part of FFmpeg. |
6 |
|
|
* |
7 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
8 |
|
|
* modify it under the terms of the GNU Lesser General Public |
9 |
|
|
* License as published by the Free Software Foundation; either |
10 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
11 |
|
|
* |
12 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
13 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 |
|
|
* Lesser General Public License for more details. |
16 |
|
|
* |
17 |
|
|
* You should have received a copy of the GNU Lesser General Public |
18 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
19 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 |
|
|
*/ |
21 |
|
|
|
22 |
|
|
#include "libavutil/intreadwrite.h" |
23 |
|
|
|
24 |
|
|
#include "avio_internal.h" |
25 |
|
|
#include "rtpdec_formats.h" |
26 |
|
|
|
27 |
|
|
#define RTP_VP9_DESC_REQUIRED_SIZE 1 |
28 |
|
|
|
29 |
|
|
struct PayloadContext { |
30 |
|
|
AVIOContext *buf; |
31 |
|
|
uint32_t timestamp; |
32 |
|
|
}; |
33 |
|
|
|
34 |
|
✗ |
static av_cold int vp9_init(AVFormatContext *ctx, int st_index, |
35 |
|
|
PayloadContext *data) |
36 |
|
|
{ |
37 |
|
✗ |
av_log(ctx, AV_LOG_WARNING, |
38 |
|
|
"RTP/VP9 support is still experimental\n"); |
39 |
|
|
|
40 |
|
✗ |
return 0; |
41 |
|
|
} |
42 |
|
|
|
43 |
|
✗ |
static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx, |
44 |
|
|
AVStream *st, AVPacket *pkt, uint32_t *timestamp, |
45 |
|
|
const uint8_t *buf, int len, uint16_t seq, |
46 |
|
|
int flags) |
47 |
|
|
{ |
48 |
|
|
int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data; |
49 |
|
✗ |
av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame; |
50 |
|
✗ |
av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1; |
51 |
|
✗ |
int ref_fields = 0, has_ref_field_ext_pic_id = 0; |
52 |
|
|
int first_fragment, last_fragment; |
53 |
|
|
int rtp_m; |
54 |
|
✗ |
int res = 0; |
55 |
|
|
|
56 |
|
|
/* drop data of previous packets in case of non-continuous (lossy) packet stream */ |
57 |
|
✗ |
if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp) |
58 |
|
✗ |
ffio_free_dyn_buf(&rtp_vp9_ctx->buf); |
59 |
|
|
|
60 |
|
|
/* sanity check for size of input packet: 1 byte payload at least */ |
61 |
|
✗ |
if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) { |
62 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len); |
63 |
|
✗ |
return AVERROR_INVALIDDATA; |
64 |
|
|
} |
65 |
|
|
|
66 |
|
|
/* |
67 |
|
|
* decode the required VP9 payload descriptor according to section 4.2 of the spec.: |
68 |
|
|
* |
69 |
|
|
* 0 1 2 3 4 5 6 7 |
70 |
|
|
* +-+-+-+-+-+-+-+-+ |
71 |
|
|
* |I|P|L|F|B|E|V|-| (REQUIRED) |
72 |
|
|
* +-+-+-+-+-+-+-+-+ |
73 |
|
|
* |
74 |
|
|
* I: PictureID present |
75 |
|
|
* P: Inter-picture predicted layer frame |
76 |
|
|
* L: Layer indices present |
77 |
|
|
* F: Flexible mode |
78 |
|
|
* B: Start of VP9 frame |
79 |
|
|
* E: End of picture |
80 |
|
|
* V: Scalability Structure (SS) present |
81 |
|
|
*/ |
82 |
|
✗ |
has_pic_id = !!(buf[0] & 0x80); |
83 |
|
✗ |
inter_picture_layer_frame = !!(buf[0] & 0x40); |
84 |
|
✗ |
has_layer_idc = !!(buf[0] & 0x20); |
85 |
|
✗ |
has_ref_idc = !!(buf[0] & 0x10); |
86 |
|
✗ |
first_fragment = !!(buf[0] & 0x08); |
87 |
|
✗ |
last_fragment = !!(buf[0] & 0x04); |
88 |
|
✗ |
has_ss_data = !!(buf[0] & 0x02); |
89 |
|
|
|
90 |
|
✗ |
rtp_m = !!(flags & RTP_FLAG_MARKER); |
91 |
|
|
|
92 |
|
|
/* sanity check for markers: B should always be equal to the RTP M marker */ |
93 |
|
✗ |
if (last_fragment != rtp_m) { |
94 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m); |
95 |
|
✗ |
return AVERROR_INVALIDDATA; |
96 |
|
|
} |
97 |
|
|
|
98 |
|
|
/* pass the extensions field */ |
99 |
|
✗ |
buf += RTP_VP9_DESC_REQUIRED_SIZE; |
100 |
|
✗ |
len -= RTP_VP9_DESC_REQUIRED_SIZE; |
101 |
|
|
|
102 |
|
|
/* |
103 |
|
|
* decode the 1-byte/2-byte picture ID: |
104 |
|
|
* |
105 |
|
|
* 0 1 2 3 4 5 6 7 |
106 |
|
|
* +-+-+-+-+-+-+-+-+ |
107 |
|
|
* I: |M|PICTURE ID | (RECOMMENDED) |
108 |
|
|
* +-+-+-+-+-+-+-+-+ |
109 |
|
|
* M: | EXTENDED PID | (RECOMMENDED) |
110 |
|
|
* +-+-+-+-+-+-+-+-+ |
111 |
|
|
* |
112 |
|
|
* M: The most significant bit of the first octet is an extension flag. |
113 |
|
|
* PictureID: 8 or 16 bits including the M bit. |
114 |
|
|
*/ |
115 |
|
✗ |
if (has_pic_id) { |
116 |
|
|
/* check for 1-byte or 2-byte picture index */ |
117 |
|
✗ |
if (buf[0] & 0x80) { |
118 |
|
✗ |
if (len < 2) { |
119 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
120 |
|
✗ |
return AVERROR_INVALIDDATA; |
121 |
|
|
} |
122 |
|
✗ |
pic_id = AV_RB16(buf) & 0x7fff; |
123 |
|
✗ |
buf += 2; |
124 |
|
✗ |
len -= 2; |
125 |
|
|
} else { |
126 |
|
✗ |
pic_id = buf[0] & 0x7f; |
127 |
|
✗ |
buf++; |
128 |
|
✗ |
len--; |
129 |
|
|
} |
130 |
|
|
} |
131 |
|
|
|
132 |
|
|
/* |
133 |
|
|
* decode layer indices |
134 |
|
|
* |
135 |
|
|
* 0 1 2 3 4 5 6 7 |
136 |
|
|
* +-+-+-+-+-+-+-+-+ |
137 |
|
|
* L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED) |
138 |
|
|
* +-+-+-+-+-+-+-+-+ |
139 |
|
|
* |
140 |
|
|
* T, S and Q are 2-bit indices for temporal, spatial, and quality layers. |
141 |
|
|
* If "F" is set in the initial octet, R is 2 bits representing the number |
142 |
|
|
* of reference fields this frame refers to. |
143 |
|
|
*/ |
144 |
|
✗ |
if (has_layer_idc) { |
145 |
|
✗ |
if (len < 1) { |
146 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
147 |
|
✗ |
return AVERROR_INVALIDDATA; |
148 |
|
|
} |
149 |
|
✗ |
layer_temporal = buf[0] & 0xC0; |
150 |
|
✗ |
layer_spatial = buf[0] & 0x30; |
151 |
|
✗ |
layer_quality = buf[0] & 0x0C; |
152 |
|
✗ |
if (has_ref_idc) { |
153 |
|
✗ |
ref_fields = buf[0] & 0x03; |
154 |
|
✗ |
if (ref_fields) |
155 |
|
✗ |
non_key_frame = 1; |
156 |
|
|
} |
157 |
|
✗ |
buf++; |
158 |
|
✗ |
len--; |
159 |
|
|
} |
160 |
|
|
|
161 |
|
|
/* |
162 |
|
|
* decode the reference fields |
163 |
|
|
* |
164 |
|
|
* 0 1 2 3 4 5 6 7 |
165 |
|
|
* +-+-+-+-+-+-+-+-+ -\ |
166 |
|
|
* F: | PID |X| RS| RQ| (OPTIONAL) . |
167 |
|
|
* +-+-+-+-+-+-+-+-+ . - R times |
168 |
|
|
* X: | EXTENDED PID | (OPTIONAL) . |
169 |
|
|
* +-+-+-+-+-+-+-+-+ -/ |
170 |
|
|
* |
171 |
|
|
* PID: The relative Picture ID referred to by this frame. |
172 |
|
|
* RS and RQ: The spatial and quality layer IDs. |
173 |
|
|
* X: 1 if this layer index has an extended relative Picture ID. |
174 |
|
|
*/ |
175 |
|
✗ |
if (has_ref_idc) { |
176 |
|
✗ |
while (ref_fields) { |
177 |
|
✗ |
if (len < 1) { |
178 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
179 |
|
✗ |
return AVERROR_INVALIDDATA; |
180 |
|
|
} |
181 |
|
|
|
182 |
|
✗ |
has_ref_field_ext_pic_id = buf[0] & 0x10; |
183 |
|
|
|
184 |
|
|
/* pass ref. field */ |
185 |
|
✗ |
if (has_ref_field_ext_pic_id) { |
186 |
|
✗ |
if (len < 2) { |
187 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
188 |
|
✗ |
return AVERROR_INVALIDDATA; |
189 |
|
|
} |
190 |
|
|
|
191 |
|
|
/* ignore ref. data */ |
192 |
|
|
|
193 |
|
✗ |
buf += 2; |
194 |
|
✗ |
len -= 2; |
195 |
|
|
} else { |
196 |
|
|
|
197 |
|
|
/* ignore ref. data */ |
198 |
|
|
|
199 |
|
✗ |
buf++; |
200 |
|
✗ |
len--; |
201 |
|
|
} |
202 |
|
✗ |
ref_fields--; |
203 |
|
|
} |
204 |
|
|
} |
205 |
|
|
|
206 |
|
|
/* |
207 |
|
|
* decode the scalability structure (SS) |
208 |
|
|
* |
209 |
|
|
* 0 1 2 3 4 5 6 7 |
210 |
|
|
* +-+-+-+-+-+-+-+-+ |
211 |
|
|
* V: | PATTERN LENGTH| |
212 |
|
|
* +-+-+-+-+-+-+-+-+ -\ |
213 |
|
|
* | T | S | Q | R | (OPTIONAL) . |
214 |
|
|
* +-+-+-+-+-+-+-+-+ -\ . |
215 |
|
|
* | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times |
216 |
|
|
* +-+-+-+-+-+-+-+-+ . - R times . |
217 |
|
|
* X: | EXTENDED PID | (OPTIONAL) . . |
218 |
|
|
* +-+-+-+-+-+-+-+-+ -/ -/ |
219 |
|
|
* |
220 |
|
|
* PID: The relative Picture ID referred to by this frame. |
221 |
|
|
* RS and RQ: The spatial and quality layer IDs. |
222 |
|
|
* X: 1 if this layer index has an extended relative Picture ID. |
223 |
|
|
*/ |
224 |
|
✗ |
if (has_ss_data) { |
225 |
|
|
int n_s, y, g, i; |
226 |
|
✗ |
if (len < 1) { |
227 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
228 |
|
✗ |
return AVERROR_INVALIDDATA; |
229 |
|
|
} |
230 |
|
✗ |
n_s = buf[0] >> 5; |
231 |
|
✗ |
y = !!(buf[0] & 0x10); |
232 |
|
✗ |
g = !!(buf[0] & 0x08); |
233 |
|
✗ |
buf++; |
234 |
|
✗ |
len--; |
235 |
|
✗ |
if (n_s > 0) { |
236 |
|
✗ |
avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers"); |
237 |
|
✗ |
return AVERROR_PATCHWELCOME; |
238 |
|
|
} |
239 |
|
✗ |
if (y) { |
240 |
|
✗ |
if (len < 4 * (n_s + 1)) { |
241 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
242 |
|
✗ |
return AVERROR_INVALIDDATA; |
243 |
|
|
} |
244 |
|
✗ |
for (i = 0; i < n_s + 1; i++) { |
245 |
|
|
av_unused int w, h; |
246 |
|
✗ |
w = AV_RB16(buf); |
247 |
|
✗ |
h = AV_RB16(buf + 2); |
248 |
|
✗ |
buf += 4; |
249 |
|
✗ |
len -= 4; |
250 |
|
|
} |
251 |
|
|
} |
252 |
|
✗ |
if (g) { |
253 |
|
|
int n_g; |
254 |
|
✗ |
if (len < 1) { |
255 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
256 |
|
✗ |
return AVERROR_INVALIDDATA; |
257 |
|
|
} |
258 |
|
✗ |
n_g = buf[0]; |
259 |
|
✗ |
buf++; |
260 |
|
✗ |
len--; |
261 |
|
✗ |
for (i = 0; i < n_g; i++) { |
262 |
|
|
av_unused int t, u, r, j; |
263 |
|
✗ |
if (len < 1) { |
264 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
265 |
|
✗ |
return AVERROR_INVALIDDATA; |
266 |
|
|
} |
267 |
|
✗ |
t = buf[0] >> 5; |
268 |
|
✗ |
u = !!(buf[0] & 0x10); |
269 |
|
✗ |
r = (buf[0] >> 2) & 0x03; |
270 |
|
✗ |
buf++; |
271 |
|
✗ |
len--; |
272 |
|
✗ |
if (len < r) { |
273 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
274 |
|
✗ |
return AVERROR_INVALIDDATA; |
275 |
|
|
} |
276 |
|
✗ |
for (j = 0; j < r; j++) { |
277 |
|
✗ |
av_unused int p_diff = buf[0]; |
278 |
|
✗ |
buf++; |
279 |
|
✗ |
len--; |
280 |
|
|
} |
281 |
|
|
} |
282 |
|
|
} |
283 |
|
|
} |
284 |
|
|
|
285 |
|
|
/* |
286 |
|
|
* decode the VP9 payload header |
287 |
|
|
* |
288 |
|
|
* spec. is tbd |
289 |
|
|
*/ |
290 |
|
|
//XXX: implement when specified |
291 |
|
|
|
292 |
|
|
/* sanity check: 1 byte payload as minimum */ |
293 |
|
✗ |
if (len < 1) { |
294 |
|
✗ |
av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
295 |
|
✗ |
return AVERROR_INVALIDDATA; |
296 |
|
|
} |
297 |
|
|
|
298 |
|
|
/* start frame buffering with new dynamic buffer */ |
299 |
|
✗ |
if (!rtp_vp9_ctx->buf) { |
300 |
|
|
/* sanity check: a new frame should have started */ |
301 |
|
✗ |
if (first_fragment) { |
302 |
|
✗ |
res = avio_open_dyn_buf(&rtp_vp9_ctx->buf); |
303 |
|
✗ |
if (res < 0) |
304 |
|
✗ |
return res; |
305 |
|
|
/* update the timestamp in the frame packet with the one from the RTP packet */ |
306 |
|
✗ |
rtp_vp9_ctx->timestamp = *timestamp; |
307 |
|
|
} else { |
308 |
|
|
/* frame not started yet, need more packets */ |
309 |
|
✗ |
return AVERROR(EAGAIN); |
310 |
|
|
} |
311 |
|
|
} |
312 |
|
|
|
313 |
|
|
/* write the fragment to the dyn. buffer */ |
314 |
|
✗ |
avio_write(rtp_vp9_ctx->buf, buf, len); |
315 |
|
|
|
316 |
|
|
/* do we need more fragments? */ |
317 |
|
✗ |
if (!last_fragment) |
318 |
|
✗ |
return AVERROR(EAGAIN); |
319 |
|
|
|
320 |
|
|
/* close frame buffering and create resulting A/V packet */ |
321 |
|
✗ |
res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index); |
322 |
|
✗ |
if (res < 0) |
323 |
|
✗ |
return res; |
324 |
|
|
|
325 |
|
✗ |
return 0; |
326 |
|
|
} |
327 |
|
|
|
328 |
|
✗ |
static void vp9_close_context(PayloadContext *vp9) |
329 |
|
|
{ |
330 |
|
✗ |
ffio_free_dyn_buf(&vp9->buf); |
331 |
|
✗ |
} |
332 |
|
|
|
333 |
|
|
const RTPDynamicProtocolHandler ff_vp9_dynamic_handler = { |
334 |
|
|
.enc_name = "VP9", |
335 |
|
|
.codec_type = AVMEDIA_TYPE_VIDEO, |
336 |
|
|
.codec_id = AV_CODEC_ID_VP9, |
337 |
|
|
.priv_data_size = sizeof(PayloadContext), |
338 |
|
|
.init = vp9_init, |
339 |
|
|
.close = vp9_close_context, |
340 |
|
|
.parse_packet = vp9_handle_packet |
341 |
|
|
}; |
342 |
|
|
|