| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * RTP parser for VP9 payload format (draft version 02) - experimental | ||
| 3 | * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com> | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include "libavutil/intreadwrite.h" | ||
| 23 | |||
| 24 | #include "avio_internal.h" | ||
| 25 | #include "rtpdec_formats.h" | ||
| 26 | |||
| 27 | #define RTP_VP9_DESC_REQUIRED_SIZE 1 | ||
| 28 | |||
| 29 | struct PayloadContext { | ||
| 30 | AVIOContext *buf; | ||
| 31 | uint32_t timestamp; | ||
| 32 | }; | ||
| 33 | |||
| 34 | ✗ | static av_cold int vp9_init(AVFormatContext *ctx, int st_index, | |
| 35 | PayloadContext *data) | ||
| 36 | { | ||
| 37 | ✗ | av_log(ctx, AV_LOG_WARNING, | |
| 38 | "RTP/VP9 support is still experimental\n"); | ||
| 39 | |||
| 40 | ✗ | return 0; | |
| 41 | } | ||
| 42 | |||
| 43 | ✗ | static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx, | |
| 44 | AVStream *st, AVPacket *pkt, uint32_t *timestamp, | ||
| 45 | const uint8_t *buf, int len, uint16_t seq, | ||
| 46 | int flags) | ||
| 47 | { | ||
| 48 | int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data; | ||
| 49 | ✗ | av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame; | |
| 50 | ✗ | av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1; | |
| 51 | ✗ | int ref_fields = 0, has_ref_field_ext_pic_id = 0; | |
| 52 | int first_fragment, last_fragment; | ||
| 53 | int rtp_m; | ||
| 54 | ✗ | int res = 0; | |
| 55 | |||
| 56 | /* drop data of previous packets in case of non-continuous (lossy) packet stream */ | ||
| 57 | ✗ | if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp) | |
| 58 | ✗ | ffio_free_dyn_buf(&rtp_vp9_ctx->buf); | |
| 59 | |||
| 60 | /* sanity check for size of input packet: 1 byte payload at least */ | ||
| 61 | ✗ | if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) { | |
| 62 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len); | |
| 63 | ✗ | return AVERROR_INVALIDDATA; | |
| 64 | } | ||
| 65 | |||
| 66 | /* | ||
| 67 | * decode the required VP9 payload descriptor according to section 4.2 of the spec.: | ||
| 68 | * | ||
| 69 | * 0 1 2 3 4 5 6 7 | ||
| 70 | * +-+-+-+-+-+-+-+-+ | ||
| 71 | * |I|P|L|F|B|E|V|-| (REQUIRED) | ||
| 72 | * +-+-+-+-+-+-+-+-+ | ||
| 73 | * | ||
| 74 | * I: PictureID present | ||
| 75 | * P: Inter-picture predicted layer frame | ||
| 76 | * L: Layer indices present | ||
| 77 | * F: Flexible mode | ||
| 78 | * B: Start of VP9 frame | ||
| 79 | * E: End of picture | ||
| 80 | * V: Scalability Structure (SS) present | ||
| 81 | */ | ||
| 82 | ✗ | has_pic_id = !!(buf[0] & 0x80); | |
| 83 | ✗ | inter_picture_layer_frame = !!(buf[0] & 0x40); | |
| 84 | ✗ | has_layer_idc = !!(buf[0] & 0x20); | |
| 85 | ✗ | has_ref_idc = !!(buf[0] & 0x10); | |
| 86 | ✗ | first_fragment = !!(buf[0] & 0x08); | |
| 87 | ✗ | last_fragment = !!(buf[0] & 0x04); | |
| 88 | ✗ | has_ss_data = !!(buf[0] & 0x02); | |
| 89 | |||
| 90 | ✗ | rtp_m = !!(flags & RTP_FLAG_MARKER); | |
| 91 | |||
| 92 | /* sanity check for markers: B should always be equal to the RTP M marker */ | ||
| 93 | ✗ | if (last_fragment != rtp_m) { | |
| 94 | ✗ | av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m); | |
| 95 | ✗ | return AVERROR_INVALIDDATA; | |
| 96 | } | ||
| 97 | |||
| 98 | /* pass the extensions field */ | ||
| 99 | ✗ | buf += RTP_VP9_DESC_REQUIRED_SIZE; | |
| 100 | ✗ | len -= RTP_VP9_DESC_REQUIRED_SIZE; | |
| 101 | |||
| 102 | /* | ||
| 103 | * decode the 1-byte/2-byte picture ID: | ||
| 104 | * | ||
| 105 | * 0 1 2 3 4 5 6 7 | ||
| 106 | * +-+-+-+-+-+-+-+-+ | ||
| 107 | * I: |M|PICTURE ID | (RECOMMENDED) | ||
| 108 | * +-+-+-+-+-+-+-+-+ | ||
| 109 | * M: | EXTENDED PID | (RECOMMENDED) | ||
| 110 | * +-+-+-+-+-+-+-+-+ | ||
| 111 | * | ||
| 112 | * M: The most significant bit of the first octet is an extension flag. | ||
| 113 | * PictureID: 8 or 16 bits including the M bit. | ||
| 114 | */ | ||
| 115 | ✗ | if (has_pic_id) { | |
| 116 | /* check for 1-byte or 2-byte picture index */ | ||
| 117 | ✗ | if (buf[0] & 0x80) { | |
| 118 | ✗ | if (len < 2) { | |
| 119 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 120 | ✗ | return AVERROR_INVALIDDATA; | |
| 121 | } | ||
| 122 | ✗ | pic_id = AV_RB16(buf) & 0x7fff; | |
| 123 | ✗ | buf += 2; | |
| 124 | ✗ | len -= 2; | |
| 125 | } else { | ||
| 126 | ✗ | pic_id = buf[0] & 0x7f; | |
| 127 | ✗ | buf++; | |
| 128 | ✗ | len--; | |
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | /* | ||
| 133 | * decode layer indices | ||
| 134 | * | ||
| 135 | * 0 1 2 3 4 5 6 7 | ||
| 136 | * +-+-+-+-+-+-+-+-+ | ||
| 137 | * L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED) | ||
| 138 | * +-+-+-+-+-+-+-+-+ | ||
| 139 | * | ||
| 140 | * T, S and Q are 2-bit indices for temporal, spatial, and quality layers. | ||
| 141 | * If "F" is set in the initial octet, R is 2 bits representing the number | ||
| 142 | * of reference fields this frame refers to. | ||
| 143 | */ | ||
| 144 | ✗ | if (has_layer_idc) { | |
| 145 | ✗ | if (len < 1) { | |
| 146 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 147 | ✗ | return AVERROR_INVALIDDATA; | |
| 148 | } | ||
| 149 | ✗ | layer_temporal = buf[0] & 0xC0; | |
| 150 | ✗ | layer_spatial = buf[0] & 0x30; | |
| 151 | ✗ | layer_quality = buf[0] & 0x0C; | |
| 152 | ✗ | if (has_ref_idc) { | |
| 153 | ✗ | ref_fields = buf[0] & 0x03; | |
| 154 | ✗ | if (ref_fields) | |
| 155 | ✗ | non_key_frame = 1; | |
| 156 | } | ||
| 157 | ✗ | buf++; | |
| 158 | ✗ | len--; | |
| 159 | } | ||
| 160 | |||
| 161 | /* | ||
| 162 | * decode the reference fields | ||
| 163 | * | ||
| 164 | * 0 1 2 3 4 5 6 7 | ||
| 165 | * +-+-+-+-+-+-+-+-+ -\ | ||
| 166 | * F: | PID |X| RS| RQ| (OPTIONAL) . | ||
| 167 | * +-+-+-+-+-+-+-+-+ . - R times | ||
| 168 | * X: | EXTENDED PID | (OPTIONAL) . | ||
| 169 | * +-+-+-+-+-+-+-+-+ -/ | ||
| 170 | * | ||
| 171 | * PID: The relative Picture ID referred to by this frame. | ||
| 172 | * RS and RQ: The spatial and quality layer IDs. | ||
| 173 | * X: 1 if this layer index has an extended relative Picture ID. | ||
| 174 | */ | ||
| 175 | ✗ | if (has_ref_idc) { | |
| 176 | ✗ | while (ref_fields) { | |
| 177 | ✗ | if (len < 1) { | |
| 178 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 179 | ✗ | return AVERROR_INVALIDDATA; | |
| 180 | } | ||
| 181 | |||
| 182 | ✗ | has_ref_field_ext_pic_id = buf[0] & 0x10; | |
| 183 | |||
| 184 | /* pass ref. field */ | ||
| 185 | ✗ | if (has_ref_field_ext_pic_id) { | |
| 186 | ✗ | if (len < 2) { | |
| 187 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 188 | ✗ | return AVERROR_INVALIDDATA; | |
| 189 | } | ||
| 190 | |||
| 191 | /* ignore ref. data */ | ||
| 192 | |||
| 193 | ✗ | buf += 2; | |
| 194 | ✗ | len -= 2; | |
| 195 | } else { | ||
| 196 | |||
| 197 | /* ignore ref. data */ | ||
| 198 | |||
| 199 | ✗ | buf++; | |
| 200 | ✗ | len--; | |
| 201 | } | ||
| 202 | ✗ | ref_fields--; | |
| 203 | } | ||
| 204 | } | ||
| 205 | |||
| 206 | /* | ||
| 207 | * decode the scalability structure (SS) | ||
| 208 | * | ||
| 209 | * 0 1 2 3 4 5 6 7 | ||
| 210 | * +-+-+-+-+-+-+-+-+ | ||
| 211 | * V: | PATTERN LENGTH| | ||
| 212 | * +-+-+-+-+-+-+-+-+ -\ | ||
| 213 | * | T | S | Q | R | (OPTIONAL) . | ||
| 214 | * +-+-+-+-+-+-+-+-+ -\ . | ||
| 215 | * | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times | ||
| 216 | * +-+-+-+-+-+-+-+-+ . - R times . | ||
| 217 | * X: | EXTENDED PID | (OPTIONAL) . . | ||
| 218 | * +-+-+-+-+-+-+-+-+ -/ -/ | ||
| 219 | * | ||
| 220 | * PID: The relative Picture ID referred to by this frame. | ||
| 221 | * RS and RQ: The spatial and quality layer IDs. | ||
| 222 | * X: 1 if this layer index has an extended relative Picture ID. | ||
| 223 | */ | ||
| 224 | ✗ | if (has_ss_data) { | |
| 225 | int n_s, y, g, i; | ||
| 226 | ✗ | if (len < 1) { | |
| 227 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 228 | ✗ | return AVERROR_INVALIDDATA; | |
| 229 | } | ||
| 230 | ✗ | n_s = buf[0] >> 5; | |
| 231 | ✗ | y = !!(buf[0] & 0x10); | |
| 232 | ✗ | g = !!(buf[0] & 0x08); | |
| 233 | ✗ | buf++; | |
| 234 | ✗ | len--; | |
| 235 | ✗ | if (n_s > 0) { | |
| 236 | ✗ | avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers"); | |
| 237 | ✗ | return AVERROR_PATCHWELCOME; | |
| 238 | } | ||
| 239 | ✗ | if (y) { | |
| 240 | ✗ | if (len < 4 * (n_s + 1)) { | |
| 241 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 242 | ✗ | return AVERROR_INVALIDDATA; | |
| 243 | } | ||
| 244 | ✗ | for (i = 0; i < n_s + 1; i++) { | |
| 245 | av_unused int w, h; | ||
| 246 | ✗ | w = AV_RB16(buf); | |
| 247 | ✗ | h = AV_RB16(buf + 2); | |
| 248 | ✗ | buf += 4; | |
| 249 | ✗ | len -= 4; | |
| 250 | } | ||
| 251 | } | ||
| 252 | ✗ | if (g) { | |
| 253 | int n_g; | ||
| 254 | ✗ | if (len < 1) { | |
| 255 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 256 | ✗ | return AVERROR_INVALIDDATA; | |
| 257 | } | ||
| 258 | ✗ | n_g = buf[0]; | |
| 259 | ✗ | buf++; | |
| 260 | ✗ | len--; | |
| 261 | ✗ | for (i = 0; i < n_g; i++) { | |
| 262 | av_unused int t, u, r, j; | ||
| 263 | ✗ | if (len < 1) { | |
| 264 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 265 | ✗ | return AVERROR_INVALIDDATA; | |
| 266 | } | ||
| 267 | ✗ | t = buf[0] >> 5; | |
| 268 | ✗ | u = !!(buf[0] & 0x10); | |
| 269 | ✗ | r = (buf[0] >> 2) & 0x03; | |
| 270 | ✗ | buf++; | |
| 271 | ✗ | len--; | |
| 272 | ✗ | if (len < r) { | |
| 273 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 274 | ✗ | return AVERROR_INVALIDDATA; | |
| 275 | } | ||
| 276 | ✗ | for (j = 0; j < r; j++) { | |
| 277 | ✗ | av_unused int p_diff = buf[0]; | |
| 278 | ✗ | buf++; | |
| 279 | ✗ | len--; | |
| 280 | } | ||
| 281 | } | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | /* | ||
| 286 | * decode the VP9 payload header | ||
| 287 | * | ||
| 288 | * spec. is tbd | ||
| 289 | */ | ||
| 290 | //XXX: implement when specified | ||
| 291 | |||
| 292 | /* sanity check: 1 byte payload as minimum */ | ||
| 293 | ✗ | if (len < 1) { | |
| 294 | ✗ | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); | |
| 295 | ✗ | return AVERROR_INVALIDDATA; | |
| 296 | } | ||
| 297 | |||
| 298 | /* start frame buffering with new dynamic buffer */ | ||
| 299 | ✗ | if (!rtp_vp9_ctx->buf) { | |
| 300 | /* sanity check: a new frame should have started */ | ||
| 301 | ✗ | if (first_fragment) { | |
| 302 | ✗ | res = avio_open_dyn_buf(&rtp_vp9_ctx->buf); | |
| 303 | ✗ | if (res < 0) | |
| 304 | ✗ | return res; | |
| 305 | /* update the timestamp in the frame packet with the one from the RTP packet */ | ||
| 306 | ✗ | rtp_vp9_ctx->timestamp = *timestamp; | |
| 307 | } else { | ||
| 308 | /* frame not started yet, need more packets */ | ||
| 309 | ✗ | return AVERROR(EAGAIN); | |
| 310 | } | ||
| 311 | } | ||
| 312 | |||
| 313 | /* write the fragment to the dyn. buffer */ | ||
| 314 | ✗ | avio_write(rtp_vp9_ctx->buf, buf, len); | |
| 315 | |||
| 316 | /* do we need more fragments? */ | ||
| 317 | ✗ | if (!last_fragment) | |
| 318 | ✗ | return AVERROR(EAGAIN); | |
| 319 | |||
| 320 | /* close frame buffering and create resulting A/V packet */ | ||
| 321 | ✗ | res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index); | |
| 322 | ✗ | if (res < 0) | |
| 323 | ✗ | return res; | |
| 324 | |||
| 325 | ✗ | return 0; | |
| 326 | } | ||
| 327 | |||
| 328 | ✗ | static void vp9_close_context(PayloadContext *vp9) | |
| 329 | { | ||
| 330 | ✗ | ffio_free_dyn_buf(&vp9->buf); | |
| 331 | ✗ | } | |
| 332 | |||
| 333 | const RTPDynamicProtocolHandler ff_vp9_dynamic_handler = { | ||
| 334 | .enc_name = "VP9", | ||
| 335 | .codec_type = AVMEDIA_TYPE_VIDEO, | ||
| 336 | .codec_id = AV_CODEC_ID_VP9, | ||
| 337 | .priv_data_size = sizeof(PayloadContext), | ||
| 338 | .init = vp9_init, | ||
| 339 | .close = vp9_close_context, | ||
| 340 | .parse_packet = vp9_handle_packet | ||
| 341 | }; | ||
| 342 |