Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2010 Aurelien Jacobs <aurel@gnuage.org> | ||
3 | * Copyright (c) 2017 Clément Bœsch <u@pkh.me> | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | |||
22 | #include "libavutil/avassert.h" | ||
23 | #include "libavutil/avstring.h" | ||
24 | #include "libavutil/common.h" | ||
25 | #include "libavutil/parseutils.h" | ||
26 | #include "htmlsubtitles.h" | ||
27 | #include <ctype.h> | ||
28 | |||
29 | 97 | static int html_color_parse(void *log_ctx, const char *str) | |
30 | { | ||
31 | uint8_t rgba[4]; | ||
32 | 97 | int nb_sharps = 0; | |
33 |
2/2✓ Branch 0 taken 47 times.
✓ Branch 1 taken 97 times.
|
144 | while (str[nb_sharps] == '#') |
34 | 47 | nb_sharps++; | |
35 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 51 times.
|
97 | str += FFMAX(0, nb_sharps - 1); |
36 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 97 times.
|
97 | if (av_parse_color(rgba, str, strcspn(str, "\" >"), log_ctx) < 0) |
37 | ✗ | return -1; | |
38 | 97 | return rgba[0] | rgba[1] << 8 | rgba[2] << 16; | |
39 | } | ||
40 | |||
41 | 934 | static void rstrip_spaces_buf(AVBPrint *buf) | |
42 | { | ||
43 |
1/2✓ Branch 1 taken 934 times.
✗ Branch 2 not taken.
|
934 | if (av_bprint_is_complete(buf)) |
44 |
4/4✓ Branch 0 taken 1085 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 152 times.
✓ Branch 3 taken 933 times.
|
1086 | while (buf->len > 0 && buf->str[buf->len - 1] == ' ') |
45 | 152 | buf->str[--buf->len] = 0; | |
46 | 934 | } | |
47 | |||
48 | /* | ||
49 | * Fast code for scanning text enclosed in braces. Functionally | ||
50 | * equivalent to this sscanf call: | ||
51 | * | ||
52 | * sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0 | ||
53 | */ | ||
54 | 128 | static int scanbraces(const char* in) { | |
55 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 104 times.
|
128 | if (strncmp(in, "{\\an", 4) != 0) { |
56 | 24 | return 0; | |
57 | } | ||
58 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 104 times.
|
104 | if (!av_isdigit(in[4])) { |
59 | ✗ | return 0; | |
60 | } | ||
61 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 104 times.
|
104 | if (in[5] != '}') { |
62 | ✗ | return 0; | |
63 | } | ||
64 | 104 | return 1; | |
65 | } | ||
66 | |||
67 | /* skip all {\xxx} substrings except for {\an%d} | ||
68 | and all microdvd like styles such as {Y:xxx} */ | ||
69 | 128 | static void handle_open_brace(AVBPrint *dst, const char **inp, int *an, int *closing_brace_missing) | |
70 | { | ||
71 | 128 | const char *in = *inp; | |
72 | |||
73 | 128 | *an += scanbraces(in); | |
74 | |||
75 |
1/2✓ Branch 0 taken 128 times.
✗ Branch 1 not taken.
|
128 | if (!*closing_brace_missing) { |
76 |
4/4✓ Branch 0 taken 32 times.
✓ Branch 1 taken 96 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 16 times.
|
128 | if ( (*an != 1 && in[1] == '\\') |
77 |
4/6✓ Branch 0 taken 112 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 104 times.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
|
112 | || (in[1] && strchr("CcFfoPSsYy", in[1]) && in[2] == ':')) { |
78 | 24 | char *bracep = strchr(in+2, '}'); | |
79 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (bracep) { |
80 | 24 | *inp = bracep; | |
81 | 24 | return; | |
82 | } else | ||
83 | ✗ | *closing_brace_missing = 1; | |
84 | } | ||
85 | } | ||
86 | |||
87 | 104 | av_bprint_chars(dst, *in, 1); | |
88 | } | ||
89 | |||
90 | struct font_tag { | ||
91 | char face[128]; | ||
92 | int size; | ||
93 | uint32_t color; | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * Fast code for scanning the rest of a tag. Functionally equivalent to | ||
98 | * this sscanf call: | ||
99 | * | ||
100 | * sscanf(in, "%127[^<>]>%n", buffer, lenp) == 2 | ||
101 | */ | ||
102 | 762 | static int scantag(const char* in, char* buffer, int* lenp) { | |
103 | int len; | ||
104 | |||
105 |
1/2✓ Branch 0 taken 6079 times.
✗ Branch 1 not taken.
|
6079 | for (len = 0; len < 128; len++) { |
106 | 6079 | const char c = *in++; | |
107 |
4/4✓ Branch 0 taken 11 times.
✓ Branch 1 taken 13 times.
✓ Branch 2 taken 738 times.
✓ Branch 3 taken 5317 times.
|
6079 | switch (c) { |
108 | 11 | case '\0': | |
109 | 11 | return 0; | |
110 | 13 | case '<': | |
111 | 13 | return 0; | |
112 | 738 | case '>': | |
113 | 738 | buffer[len] = '\0'; | |
114 | 738 | *lenp = len+1; | |
115 | 738 | return 1; | |
116 | 5317 | default: | |
117 | 5317 | break; | |
118 | } | ||
119 | 5317 | buffer[len] = c; | |
120 | } | ||
121 | ✗ | return 0; | |
122 | } | ||
123 | |||
124 | /* | ||
125 | * The general politic of the convert is to mask unsupported tags or formatting | ||
126 | * errors (but still alert the user/subtitles writer with an error/warning) | ||
127 | * without dropping any actual text content for the final user. | ||
128 | */ | ||
129 | 411 | int ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in) | |
130 | { | ||
131 | char *param, buffer[128]; | ||
132 | 411 | int len, tag_close, sptr = 0, line_start = 1, an = 0, end = 0; | |
133 | 411 | int closing_brace_missing = 0; | |
134 | int i, likely_a_tag; | ||
135 | |||
136 | /* | ||
137 | * state stack is only present for fonts since they are the only tags where | ||
138 | * the state is not binary. Here is a typical use case: | ||
139 | * | ||
140 | * <font color="red" size=10> | ||
141 | * red 10 | ||
142 | * <font size=50> RED AND BIG </font> | ||
143 | * red 10 again | ||
144 | * </font> | ||
145 | * | ||
146 | * On the other hand, using the state system for all the tags should be | ||
147 | * avoided because it breaks wrongly nested tags such as: | ||
148 | * | ||
149 | * <b> foo <i> bar </b> bla </i> | ||
150 | * | ||
151 | * We don't want to break here; instead, we will treat all these tags as | ||
152 | * binary state markers. Basically, "<b>" will activate bold, and "</b>" | ||
153 | * will deactivate it, whatever the current state. | ||
154 | * | ||
155 | * This will also prevents cases where we have a random closing tag | ||
156 | * remaining after the opening one was dropped. Yes, this happens and we | ||
157 | * still don't want to print a "</b>" at the end of the dialog event. | ||
158 | */ | ||
159 | struct font_tag stack[16]; | ||
160 | |||
161 | 411 | memset(&stack[0], 0, sizeof(stack[0])); | |
162 | |||
163 |
3/4✓ Branch 0 taken 34008 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 33597 times.
✓ Branch 3 taken 411 times.
|
34008 | for (; !end && *in; in++) { |
164 |
5/6✗ Branch 0 not taken.
✓ Branch 1 taken 523 times.
✓ Branch 2 taken 4783 times.
✓ Branch 3 taken 128 times.
✓ Branch 4 taken 762 times.
✓ Branch 5 taken 27401 times.
|
33597 | switch (*in) { |
165 | ✗ | case '\r': | |
166 | ✗ | break; | |
167 | 523 | case '\n': | |
168 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 523 times.
|
523 | if (line_start) { |
169 | ✗ | end = 1; | |
170 | ✗ | break; | |
171 | } | ||
172 | 523 | rstrip_spaces_buf(dst); | |
173 | 523 | av_bprintf(dst, "\\N"); | |
174 | 523 | line_start = 1; | |
175 | 523 | break; | |
176 | 4783 | case ' ': | |
177 |
2/2✓ Branch 0 taken 4703 times.
✓ Branch 1 taken 80 times.
|
4783 | if (!line_start) |
178 | 4703 | av_bprint_chars(dst, *in, 1); | |
179 | 4783 | break; | |
180 | 128 | case '{': | |
181 | 128 | handle_open_brace(dst, &in, &an, &closing_brace_missing); | |
182 | 128 | break; | |
183 | 762 | case '<': | |
184 | /* | ||
185 | * "<<" are likely latin guillemets in ASCII or some kind of random | ||
186 | * style effect; see sub/badsyntax.srt in the FATE samples | ||
187 | * directory for real test cases. | ||
188 | */ | ||
189 | |||
190 | 762 | likely_a_tag = 1; | |
191 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 762 times.
|
789 | for (i = 0; in[1] == '<'; i++) { |
192 | 27 | av_bprint_chars(dst, '<', 1); | |
193 | 27 | likely_a_tag = 0; | |
194 | 27 | in++; | |
195 | } | ||
196 | |||
197 | 762 | tag_close = in[1] == '/'; | |
198 |
2/2✓ Branch 0 taken 352 times.
✓ Branch 1 taken 410 times.
|
762 | if (tag_close) |
199 | 352 | likely_a_tag = 1; | |
200 | |||
201 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 762 times.
|
762 | av_assert0(in[0] == '<'); |
202 | |||
203 | 762 | len = 0; | |
204 | |||
205 |
3/4✓ Branch 1 taken 738 times.
✓ Branch 2 taken 24 times.
✓ Branch 3 taken 738 times.
✗ Branch 4 not taken.
|
1500 | if (scantag(in+tag_close+1, buffer, &len) && len > 0) { |
206 | 738 | const int skip = len + tag_close; | |
207 | 738 | const char *tagname = buffer; | |
208 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 738 times.
|
743 | while (*tagname == ' ') { |
209 | 5 | likely_a_tag = 0; | |
210 | 5 | tagname++; | |
211 | } | ||
212 |
2/2✓ Branch 0 taken 178 times.
✓ Branch 1 taken 560 times.
|
738 | if ((param = strchr(tagname, ' '))) |
213 | 178 | *param++ = 0; | |
214 | |||
215 | /* Check if this is likely a tag */ | ||
216 | #define LIKELY_A_TAG_CHAR(x) (((x) >= '0' && (x) <= '9') || \ | ||
217 | ((x) >= 'a' && (x) <= 'z') || \ | ||
218 | ((x) >= 'A' && (x) <= 'Z') || \ | ||
219 | (x) == '_' || (x) == '/') | ||
220 |
2/2✓ Branch 0 taken 2730 times.
✓ Branch 1 taken 727 times.
|
3457 | for (i = 0; tagname[i]; i++) { |
221 |
15/16✓ Branch 0 taken 2718 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 2691 times.
✓ Branch 3 taken 27 times.
✓ Branch 4 taken 2591 times.
✓ Branch 5 taken 112 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2591 times.
✓ Branch 8 taken 99 times.
✓ Branch 9 taken 13 times.
✓ Branch 10 taken 88 times.
✓ Branch 11 taken 11 times.
✓ Branch 12 taken 13 times.
✓ Branch 13 taken 88 times.
✓ Branch 14 taken 11 times.
✓ Branch 15 taken 2 times.
|
2730 | if (!LIKELY_A_TAG_CHAR(tagname[i])) { |
222 | 11 | likely_a_tag = 0; | |
223 | 11 | break; | |
224 | } | ||
225 | } | ||
226 | |||
227 |
2/2✓ Branch 1 taken 310 times.
✓ Branch 2 taken 428 times.
|
738 | if (!av_strcasecmp(tagname, "font")) { |
228 |
3/4✓ Branch 0 taken 155 times.
✓ Branch 1 taken 155 times.
✓ Branch 2 taken 155 times.
✗ Branch 3 not taken.
|
465 | if (tag_close && sptr > 0) { |
229 | 155 | struct font_tag *cur_tag = &stack[sptr--]; | |
230 | 155 | struct font_tag *last_tag = &stack[sptr]; | |
231 | |||
232 |
2/2✓ Branch 0 taken 93 times.
✓ Branch 1 taken 62 times.
|
155 | if (cur_tag->size) { |
233 |
2/2✓ Branch 0 taken 77 times.
✓ Branch 1 taken 16 times.
|
93 | if (!last_tag->size) |
234 | 77 | av_bprintf(dst, "{\\fs}"); | |
235 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | else if (last_tag->size != cur_tag->size) |
236 | 8 | av_bprintf(dst, "{\\fs%d}", last_tag->size); | |
237 | } | ||
238 | |||
239 |
2/2✓ Branch 0 taken 121 times.
✓ Branch 1 taken 34 times.
|
155 | if (cur_tag->color & 0xff000000) { |
240 |
2/2✓ Branch 0 taken 88 times.
✓ Branch 1 taken 33 times.
|
121 | if (!(last_tag->color & 0xff000000)) |
241 | 88 | av_bprintf(dst, "{\\c}"); | |
242 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 24 times.
|
33 | else if (last_tag->color != cur_tag->color) |
243 | 9 | av_bprintf(dst, "{\\c&H%"PRIX32"&}", last_tag->color & 0xffffff); | |
244 | } | ||
245 | |||
246 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 147 times.
|
155 | if (cur_tag->face[0]) { |
247 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (!last_tag->face[0]) |
248 | 8 | av_bprintf(dst, "{\\fn}"); | |
249 | ✗ | else if (strcmp(last_tag->face, cur_tag->face)) | |
250 | ✗ | av_bprintf(dst, "{\\fn%s}", last_tag->face); | |
251 | } | ||
252 |
2/4✓ Branch 0 taken 155 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 155 times.
✗ Branch 3 not taken.
|
155 | } else if (!tag_close && sptr < FF_ARRAY_ELEMS(stack) - 1) { |
253 | 155 | struct font_tag *new_tag = &stack[sptr + 1]; | |
254 | |||
255 | 155 | *new_tag = stack[sptr++]; | |
256 | |||
257 |
2/2✓ Branch 0 taken 191 times.
✓ Branch 1 taken 155 times.
|
346 | while (param) { |
258 |
2/2✓ Branch 1 taken 85 times.
✓ Branch 2 taken 106 times.
|
191 | if (!av_strncasecmp(param, "size=", 5)) { |
259 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 53 times.
|
85 | param += 5 + (param[5] == '"'); |
260 |
1/2✓ Branch 0 taken 85 times.
✗ Branch 1 not taken.
|
85 | if (sscanf(param, "%u", &new_tag->size) == 1) |
261 | 85 | av_bprintf(dst, "{\\fs%u}", new_tag->size); | |
262 |
2/2✓ Branch 1 taken 97 times.
✓ Branch 2 taken 9 times.
|
106 | } else if (!av_strncasecmp(param, "color=", 6)) { |
263 | int color; | ||
264 |
2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 31 times.
|
97 | param += 6 + (param[6] == '"'); |
265 | 97 | color = html_color_parse(log_ctx, param); | |
266 |
1/2✓ Branch 0 taken 97 times.
✗ Branch 1 not taken.
|
97 | if (color >= 0) { |
267 | 97 | new_tag->color = 0xff000000 | color; | |
268 | 97 | av_bprintf(dst, "{\\c&H%"PRIX32"&}", new_tag->color & 0xffffff); | |
269 | } | ||
270 |
2/2✓ Branch 1 taken 8 times.
✓ Branch 2 taken 1 times.
|
9 | } else if (!av_strncasecmp(param, "face=", 5)) { |
271 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | param += 5 + (param[5] == '"'); |
272 | 8 | len = strcspn(param, | |
273 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | param[-1] == '"' ? "\"" :" "); |
274 | 8 | av_strlcpy(new_tag->face, param, | |
275 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | FFMIN(sizeof(new_tag->face), len+1)); |
276 | 8 | param += len; | |
277 | 8 | av_bprintf(dst, "{\\fn%s}", new_tag->face); | |
278 | } | ||
279 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 155 times.
|
191 | if ((param = strchr(param, ' '))) |
280 | 36 | param++; | |
281 | } | ||
282 | } | ||
283 | 310 | in += skip; | |
284 |
6/6✓ Branch 0 taken 427 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 258 times.
✓ Branch 3 taken 169 times.
✓ Branch 4 taken 256 times.
✓ Branch 5 taken 2 times.
|
428 | } else if (tagname[0] && !tagname[1] && strchr("bisu", av_tolower(tagname[0]))) { |
285 | 256 | av_bprintf(dst, "{\\%c%d}", (char)av_tolower(tagname[0]), !tag_close); | |
286 | 256 | in += skip; | |
287 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 166 times.
|
172 | } else if (!av_strncasecmp(tagname, "br", 2) && |
288 |
5/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
|
6 | (!tagname[2] || (tagname[2] == '/' && !tagname[3]))) { |
289 | 4 | av_bprintf(dst, "\\N"); | |
290 | 4 | in += skip; | |
291 |
2/2✓ Branch 0 taken 151 times.
✓ Branch 1 taken 17 times.
|
168 | } else if (likely_a_tag) { |
292 |
2/2✓ Branch 0 taken 81 times.
✓ Branch 1 taken 70 times.
|
151 | if (!tag_close) // warn only once |
293 | 81 | av_log(log_ctx, AV_LOG_WARNING, "Unrecognized tag %s\n", tagname); | |
294 | 151 | in += skip; | |
295 | } else { | ||
296 | 17 | av_bprint_chars(dst, '<', 1); | |
297 | } | ||
298 | } else { | ||
299 | 24 | av_bprint_chars(dst, *in, 1); | |
300 | } | ||
301 | 762 | break; | |
302 | 27401 | default: | |
303 | 27401 | av_bprint_chars(dst, *in, 1); | |
304 | 27401 | break; | |
305 | } | ||
306 |
5/6✓ Branch 0 taken 28814 times.
✓ Branch 1 taken 4783 times.
✓ Branch 2 taken 28814 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 28291 times.
✓ Branch 5 taken 523 times.
|
33597 | if (*in != ' ' && *in != '\r' && *in != '\n') |
307 | 28291 | line_start = 0; | |
308 | } | ||
309 | |||
310 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 411 times.
|
411 | if (!av_bprint_is_complete(dst)) |
311 | ✗ | return AVERROR(ENOMEM); | |
312 | |||
313 |
3/4✓ Branch 0 taken 410 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 410 times.
|
411 | while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N", 2)) |
314 | ✗ | dst->len -= 2; | |
315 | 411 | dst->str[dst->len] = 0; | |
316 | 411 | rstrip_spaces_buf(dst); | |
317 | |||
318 | 411 | return 0; | |
319 | } | ||
320 |