Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (c) 2010 Aurelien Jacobs <aurel@gnuage.org> | ||
3 | * Copyright (c) 2017 Clément Bœsch <u@pkh.me> | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | |||
22 | #include "libavutil/avassert.h" | ||
23 | #include "libavutil/avstring.h" | ||
24 | #include "libavutil/common.h" | ||
25 | #include "libavutil/parseutils.h" | ||
26 | #include "htmlsubtitles.h" | ||
27 | #include <ctype.h> | ||
28 | |||
29 | 81 | static int html_color_parse(void *log_ctx, const char *str) | |
30 | { | ||
31 | uint8_t rgba[4]; | ||
32 | 81 | int nb_sharps = 0; | |
33 |
2/2✓ Branch 0 taken 37 times.
✓ Branch 1 taken 81 times.
|
118 | while (str[nb_sharps] == '#') |
34 | 37 | nb_sharps++; | |
35 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 45 times.
|
81 | str += FFMAX(0, nb_sharps - 1); |
36 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 81 times.
|
81 | if (av_parse_color(rgba, str, strcspn(str, "\" >"), log_ctx) < 0) |
37 | ✗ | return -1; | |
38 | 81 | return rgba[0] | rgba[1] << 8 | rgba[2] << 16; | |
39 | } | ||
40 | |||
41 | 732 | static void rstrip_spaces_buf(AVBPrint *buf) | |
42 | { | ||
43 |
1/2✓ Branch 1 taken 732 times.
✗ Branch 2 not taken.
|
732 | if (av_bprint_is_complete(buf)) |
44 |
4/4✓ Branch 0 taken 869 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 138 times.
✓ Branch 3 taken 731 times.
|
870 | while (buf->len > 0 && buf->str[buf->len - 1] == ' ') |
45 | 138 | buf->str[--buf->len] = 0; | |
46 | 732 | } | |
47 | |||
48 | /* | ||
49 | * Fast code for scanning text enclosed in braces. Functionally | ||
50 | * equivalent to this sscanf call: | ||
51 | * | ||
52 | * sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0 | ||
53 | */ | ||
54 | 96 | static int scanbraces(const char* in) { | |
55 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 78 times.
|
96 | if (strncmp(in, "{\\an", 4) != 0) { |
56 | 18 | return 0; | |
57 | } | ||
58 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 78 times.
|
78 | if (!av_isdigit(in[4])) { |
59 | ✗ | return 0; | |
60 | } | ||
61 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 78 times.
|
78 | if (in[5] != '}') { |
62 | ✗ | return 0; | |
63 | } | ||
64 | 78 | return 1; | |
65 | } | ||
66 | |||
67 | /* skip all {\xxx} substrings except for {\an%d} | ||
68 | and all microdvd like styles such as {Y:xxx} */ | ||
69 | 96 | static void handle_open_brace(AVBPrint *dst, const char **inp, int *an, int *closing_brace_missing) | |
70 | { | ||
71 | 96 | const char *in = *inp; | |
72 | |||
73 | 96 | *an += scanbraces(in); | |
74 | |||
75 |
1/2✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
|
96 | if (!*closing_brace_missing) { |
76 |
4/4✓ Branch 0 taken 24 times.
✓ Branch 1 taken 72 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 12 times.
|
96 | if ( (*an != 1 && in[1] == '\\') |
77 |
4/6✓ Branch 0 taken 84 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 78 times.
✓ Branch 4 taken 6 times.
✗ Branch 5 not taken.
|
84 | || (in[1] && strchr("CcFfoPSsYy", in[1]) && in[2] == ':')) { |
78 | 18 | char *bracep = strchr(in+2, '}'); | |
79 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (bracep) { |
80 | 18 | *inp = bracep; | |
81 | 18 | return; | |
82 | } else | ||
83 | ✗ | *closing_brace_missing = 1; | |
84 | } | ||
85 | } | ||
86 | |||
87 | 78 | av_bprint_chars(dst, *in, 1); | |
88 | } | ||
89 | |||
90 | struct font_tag { | ||
91 | char face[128]; | ||
92 | int size; | ||
93 | uint32_t color; | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * Fast code for scanning the rest of a tag. Functionally equivalent to | ||
98 | * this sscanf call: | ||
99 | * | ||
100 | * sscanf(in, "%127[^<>]>%n", buffer, lenp) == 2 | ||
101 | */ | ||
102 | 626 | static int scantag(const char* in, char* buffer, int* lenp) { | |
103 | int len; | ||
104 | |||
105 |
1/2✓ Branch 0 taken 5011 times.
✗ Branch 1 not taken.
|
5011 | for (len = 0; len < 128; len++) { |
106 | 5011 | const char c = *in++; | |
107 |
4/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 11 times.
✓ Branch 2 taken 606 times.
✓ Branch 3 taken 4385 times.
|
5011 | switch (c) { |
108 | 9 | case '\0': | |
109 | 9 | return 0; | |
110 | 11 | case '<': | |
111 | 11 | return 0; | |
112 | 606 | case '>': | |
113 | 606 | buffer[len] = '\0'; | |
114 | 606 | *lenp = len+1; | |
115 | 606 | return 1; | |
116 | 4385 | default: | |
117 | 4385 | break; | |
118 | } | ||
119 | 4385 | buffer[len] = c; | |
120 | } | ||
121 | ✗ | return 0; | |
122 | } | ||
123 | |||
124 | /* | ||
125 | * The general politic of the convert is to mask unsupported tags or formatting | ||
126 | * errors (but still alert the user/subtitles writer with an error/warning) | ||
127 | * without dropping any actual text content for the final user. | ||
128 | */ | ||
129 | 337 | int ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in) | |
130 | { | ||
131 | char *param, buffer[128]; | ||
132 | 337 | int len, tag_close, sptr = 0, line_start = 1, an = 0, end = 0; | |
133 | 337 | int closing_brace_missing = 0; | |
134 | int i, likely_a_tag; | ||
135 | |||
136 | /* | ||
137 | * state stack is only present for fonts since they are the only tags where | ||
138 | * the state is not binary. Here is a typical use case: | ||
139 | * | ||
140 | * <font color="red" size=10> | ||
141 | * red 10 | ||
142 | * <font size=50> RED AND BIG </font> | ||
143 | * red 10 again | ||
144 | * </font> | ||
145 | * | ||
146 | * On the other hand, using the state system for all the tags should be | ||
147 | * avoided because it breaks wrongly nested tags such as: | ||
148 | * | ||
149 | * <b> foo <i> bar </b> bla </i> | ||
150 | * | ||
151 | * We don't want to break here; instead, we will treat all these tags as | ||
152 | * binary state markers. Basically, "<b>" will activate bold, and "</b>" | ||
153 | * will deactivate it, whatever the current state. | ||
154 | * | ||
155 | * This will also prevents cases where we have a random closing tag | ||
156 | * remaining after the opening one was dropped. Yes, this happens and we | ||
157 | * still don't want to print a "</b>" at the end of the dialog event. | ||
158 | */ | ||
159 | struct font_tag stack[16]; | ||
160 | |||
161 | 337 | memset(&stack[0], 0, sizeof(stack[0])); | |
162 | |||
163 |
3/4✓ Branch 0 taken 27050 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 26713 times.
✓ Branch 3 taken 337 times.
|
27050 | for (; !end && *in; in++) { |
164 |
5/6✗ Branch 0 not taken.
✓ Branch 1 taken 395 times.
✓ Branch 2 taken 3767 times.
✓ Branch 3 taken 96 times.
✓ Branch 4 taken 626 times.
✓ Branch 5 taken 21829 times.
|
26713 | switch (*in) { |
165 | ✗ | case '\r': | |
166 | ✗ | break; | |
167 | 395 | case '\n': | |
168 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 395 times.
|
395 | if (line_start) { |
169 | ✗ | end = 1; | |
170 | ✗ | break; | |
171 | } | ||
172 | 395 | rstrip_spaces_buf(dst); | |
173 | 395 | av_bprintf(dst, "\\N"); | |
174 | 395 | line_start = 1; | |
175 | 395 | break; | |
176 | 3767 | case ' ': | |
177 |
2/2✓ Branch 0 taken 3707 times.
✓ Branch 1 taken 60 times.
|
3767 | if (!line_start) |
178 | 3707 | av_bprint_chars(dst, *in, 1); | |
179 | 3767 | break; | |
180 | 96 | case '{': | |
181 | 96 | handle_open_brace(dst, &in, &an, &closing_brace_missing); | |
182 | 96 | break; | |
183 | 626 | case '<': | |
184 | /* | ||
185 | * "<<" are likely latin guillemets in ASCII or some kind of random | ||
186 | * style effect; see sub/badsyntax.srt in the FATE samples | ||
187 | * directory for real test cases. | ||
188 | */ | ||
189 | |||
190 | 626 | likely_a_tag = 1; | |
191 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 626 times.
|
653 | for (i = 0; in[1] == '<'; i++) { |
192 | 27 | av_bprint_chars(dst, '<', 1); | |
193 | 27 | likely_a_tag = 0; | |
194 | 27 | in++; | |
195 | } | ||
196 | |||
197 | 626 | tag_close = in[1] == '/'; | |
198 |
2/2✓ Branch 0 taken 288 times.
✓ Branch 1 taken 338 times.
|
626 | if (tag_close) |
199 | 288 | likely_a_tag = 1; | |
200 | |||
201 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 626 times.
|
626 | av_assert0(in[0] == '<'); |
202 | |||
203 | 626 | len = 0; | |
204 | |||
205 |
3/4✓ Branch 1 taken 606 times.
✓ Branch 2 taken 20 times.
✓ Branch 3 taken 606 times.
✗ Branch 4 not taken.
|
1232 | if (scantag(in+tag_close+1, buffer, &len) && len > 0) { |
206 | 606 | const int skip = len + tag_close; | |
207 | 606 | const char *tagname = buffer; | |
208 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 606 times.
|
611 | while (*tagname == ' ') { |
209 | 5 | likely_a_tag = 0; | |
210 | 5 | tagname++; | |
211 | } | ||
212 |
2/2✓ Branch 0 taken 148 times.
✓ Branch 1 taken 458 times.
|
606 | if ((param = strchr(tagname, ' '))) |
213 | 148 | *param++ = 0; | |
214 | |||
215 | /* Check if this is likely a tag */ | ||
216 | #define LIKELY_A_TAG_CHAR(x) (((x) >= '0' && (x) <= '9') || \ | ||
217 | ((x) >= 'a' && (x) <= 'z') || \ | ||
218 | ((x) >= 'A' && (x) <= 'Z') || \ | ||
219 | (x) == '_' || (x) == '/') | ||
220 |
2/2✓ Branch 0 taken 2228 times.
✓ Branch 1 taken 597 times.
|
2825 | for (i = 0; tagname[i]; i++) { |
221 |
15/16✓ Branch 0 taken 2218 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 2193 times.
✓ Branch 3 taken 25 times.
✓ Branch 4 taken 2115 times.
✓ Branch 5 taken 88 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2115 times.
✓ Branch 8 taken 77 times.
✓ Branch 9 taken 11 times.
✓ Branch 10 taken 66 times.
✓ Branch 11 taken 11 times.
✓ Branch 12 taken 11 times.
✓ Branch 13 taken 66 times.
✓ Branch 14 taken 9 times.
✓ Branch 15 taken 2 times.
|
2228 | if (!LIKELY_A_TAG_CHAR(tagname[i])) { |
222 | 9 | likely_a_tag = 0; | |
223 | 9 | break; | |
224 | } | ||
225 | } | ||
226 | |||
227 |
2/2✓ Branch 1 taken 258 times.
✓ Branch 2 taken 348 times.
|
606 | if (!av_strcasecmp(tagname, "font")) { |
228 |
3/4✓ Branch 0 taken 129 times.
✓ Branch 1 taken 129 times.
✓ Branch 2 taken 129 times.
✗ Branch 3 not taken.
|
387 | if (tag_close && sptr > 0) { |
229 | 129 | struct font_tag *cur_tag = &stack[sptr--]; | |
230 | 129 | struct font_tag *last_tag = &stack[sptr]; | |
231 | |||
232 |
2/2✓ Branch 0 taken 75 times.
✓ Branch 1 taken 54 times.
|
129 | if (cur_tag->size) { |
233 |
2/2✓ Branch 0 taken 63 times.
✓ Branch 1 taken 12 times.
|
75 | if (!last_tag->size) |
234 | 63 | av_bprintf(dst, "{\\fs}"); | |
235 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | else if (last_tag->size != cur_tag->size) |
236 | 6 | av_bprintf(dst, "{\\fs%d}", last_tag->size); | |
237 | } | ||
238 | |||
239 |
2/2✓ Branch 0 taken 103 times.
✓ Branch 1 taken 26 times.
|
129 | if (cur_tag->color & 0xff000000) { |
240 |
2/2✓ Branch 0 taken 74 times.
✓ Branch 1 taken 29 times.
|
103 | if (!(last_tag->color & 0xff000000)) |
241 | 74 | av_bprintf(dst, "{\\c}"); | |
242 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 22 times.
|
29 | else if (last_tag->color != cur_tag->color) |
243 | 7 | av_bprintf(dst, "{\\c&H%"PRIX32"&}", last_tag->color & 0xffffff); | |
244 | } | ||
245 | |||
246 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 123 times.
|
129 | if (cur_tag->face[0]) { |
247 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (!last_tag->face[0]) |
248 | 6 | av_bprintf(dst, "{\\fn}"); | |
249 | ✗ | else if (strcmp(last_tag->face, cur_tag->face)) | |
250 | ✗ | av_bprintf(dst, "{\\fn%s}", last_tag->face); | |
251 | } | ||
252 |
2/4✓ Branch 0 taken 129 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 129 times.
✗ Branch 3 not taken.
|
129 | } else if (!tag_close && sptr < FF_ARRAY_ELEMS(stack) - 1) { |
253 | 129 | struct font_tag *new_tag = &stack[sptr + 1]; | |
254 | |||
255 | 129 | *new_tag = stack[sptr++]; | |
256 | |||
257 |
2/2✓ Branch 0 taken 157 times.
✓ Branch 1 taken 129 times.
|
286 | while (param) { |
258 |
2/2✓ Branch 1 taken 69 times.
✓ Branch 2 taken 88 times.
|
157 | if (!av_strncasecmp(param, "size=", 5)) { |
259 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 45 times.
|
69 | param += 5 + (param[5] == '"'); |
260 |
1/2✓ Branch 0 taken 69 times.
✗ Branch 1 not taken.
|
69 | if (sscanf(param, "%u", &new_tag->size) == 1) |
261 | 69 | av_bprintf(dst, "{\\fs%u}", new_tag->size); | |
262 |
2/2✓ Branch 1 taken 81 times.
✓ Branch 2 taken 7 times.
|
88 | } else if (!av_strncasecmp(param, "color=", 6)) { |
263 | int color; | ||
264 |
2/2✓ Branch 0 taken 54 times.
✓ Branch 1 taken 27 times.
|
81 | param += 6 + (param[6] == '"'); |
265 | 81 | color = html_color_parse(log_ctx, param); | |
266 |
1/2✓ Branch 0 taken 81 times.
✗ Branch 1 not taken.
|
81 | if (color >= 0) { |
267 | 81 | new_tag->color = 0xff000000 | color; | |
268 | 81 | av_bprintf(dst, "{\\c&H%"PRIX32"&}", new_tag->color & 0xffffff); | |
269 | } | ||
270 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 1 times.
|
7 | } else if (!av_strncasecmp(param, "face=", 5)) { |
271 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | param += 5 + (param[5] == '"'); |
272 | 6 | len = strcspn(param, | |
273 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | param[-1] == '"' ? "\"" :" "); |
274 | 6 | av_strlcpy(new_tag->face, param, | |
275 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | FFMIN(sizeof(new_tag->face), len+1)); |
276 | 6 | param += len; | |
277 | 6 | av_bprintf(dst, "{\\fn%s}", new_tag->face); | |
278 | } | ||
279 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 129 times.
|
157 | if ((param = strchr(param, ' '))) |
280 | 28 | param++; | |
281 | } | ||
282 | } | ||
283 | 258 | in += skip; | |
284 |
6/6✓ Branch 0 taken 347 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 202 times.
✓ Branch 3 taken 145 times.
✓ Branch 4 taken 200 times.
✓ Branch 5 taken 2 times.
|
348 | } else if (tagname[0] && !tagname[1] && strchr("bisu", av_tolower(tagname[0]))) { |
285 | 200 | av_bprintf(dst, "{\\%c%d}", (char)av_tolower(tagname[0]), !tag_close); | |
286 | 200 | in += skip; | |
287 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 142 times.
|
148 | } else if (!av_strncasecmp(tagname, "br", 2) && |
288 |
5/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
|
6 | (!tagname[2] || (tagname[2] == '/' && !tagname[3]))) { |
289 | 4 | av_bprintf(dst, "\\N"); | |
290 | 4 | in += skip; | |
291 |
2/2✓ Branch 0 taken 129 times.
✓ Branch 1 taken 15 times.
|
144 | } else if (likely_a_tag) { |
292 |
2/2✓ Branch 0 taken 69 times.
✓ Branch 1 taken 60 times.
|
129 | if (!tag_close) // warn only once |
293 | 69 | av_log(log_ctx, AV_LOG_WARNING, "Unrecognized tag %s\n", tagname); | |
294 | 129 | in += skip; | |
295 | } else { | ||
296 | 15 | av_bprint_chars(dst, '<', 1); | |
297 | } | ||
298 | } else { | ||
299 | 20 | av_bprint_chars(dst, *in, 1); | |
300 | } | ||
301 | 626 | break; | |
302 | 21829 | default: | |
303 | 21829 | av_bprint_chars(dst, *in, 1); | |
304 | 21829 | break; | |
305 | } | ||
306 |
5/6✓ Branch 0 taken 22946 times.
✓ Branch 1 taken 3767 times.
✓ Branch 2 taken 22946 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 22551 times.
✓ Branch 5 taken 395 times.
|
26713 | if (*in != ' ' && *in != '\r' && *in != '\n') |
307 | 22551 | line_start = 0; | |
308 | } | ||
309 | |||
310 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 337 times.
|
337 | if (!av_bprint_is_complete(dst)) |
311 | ✗ | return AVERROR(ENOMEM); | |
312 | |||
313 |
3/4✓ Branch 0 taken 336 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 336 times.
|
337 | while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N", 2)) |
314 | ✗ | dst->len -= 2; | |
315 | 337 | dst->str[dst->len] = 0; | |
316 | 337 | rstrip_spaces_buf(dst); | |
317 | |||
318 | 337 | return 0; | |
319 | } | ||
320 |