Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Texture block decompression | ||
3 | * Copyright (C) 2009 Benjamin Dobell, Glass Echidna | ||
4 | * Copyright (C) 2012 Matthäus G. "Anteru" Chajdas (http://anteru.net) | ||
5 | * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com> | ||
6 | * | ||
7 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
8 | * copy of this software and associated documentation files (the "Software"), | ||
9 | * to deal in the Software without restriction, including without limitation | ||
10 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
11 | * and/or sell copies of the Software, and to permit persons to whom the | ||
12 | * Software is furnished to do so, subject to the following conditions: | ||
13 | * The above copyright notice and this permission notice shall be included | ||
14 | * in all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
22 | * IN THE SOFTWARE. | ||
23 | */ | ||
24 | |||
25 | #include <stddef.h> | ||
26 | #include <stdint.h> | ||
27 | |||
28 | #include "libavutil/attributes.h" | ||
29 | #include "libavutil/common.h" | ||
30 | #include "libavutil/intreadwrite.h" | ||
31 | #include "libavutil/libm.h" | ||
32 | |||
33 | #include "texturedsp.h" | ||
34 | |||
35 | #define RGBA(r, g, b, a) (((uint8_t)(r) << 0) | \ | ||
36 | ((uint8_t)(g) << 8) | \ | ||
37 | ((uint8_t)(b) << 16) | \ | ||
38 | ((unsigned)(uint8_t)(a) << 24)) | ||
39 | |||
40 | 1135742 | static av_always_inline void extract_color(uint32_t colors[4], | |
41 | uint16_t color0, | ||
42 | uint16_t color1, | ||
43 | int dxtn, int alpha) | ||
44 | { | ||
45 | int tmp; | ||
46 | uint8_t r0, g0, b0, r1, g1, b1; | ||
47 |
2/2✓ Branch 0 taken 586110 times.
✓ Branch 1 taken 549632 times.
|
1135742 | uint8_t a = dxtn ? 0 : 255; |
48 | |||
49 | 1135742 | tmp = (color0 >> 11) * 255 + 16; | |
50 | 1135742 | r0 = (uint8_t) ((tmp / 32 + tmp) / 32); | |
51 | 1135742 | tmp = ((color0 & 0x07E0) >> 5) * 255 + 32; | |
52 | 1135742 | g0 = (uint8_t) ((tmp / 64 + tmp) / 64); | |
53 | 1135742 | tmp = (color0 & 0x001F) * 255 + 16; | |
54 | 1135742 | b0 = (uint8_t) ((tmp / 32 + tmp) / 32); | |
55 | |||
56 | 1135742 | tmp = (color1 >> 11) * 255 + 16; | |
57 | 1135742 | r1 = (uint8_t) ((tmp / 32 + tmp) / 32); | |
58 | 1135742 | tmp = ((color1 & 0x07E0) >> 5) * 255 + 32; | |
59 | 1135742 | g1 = (uint8_t) ((tmp / 64 + tmp) / 64); | |
60 | 1135742 | tmp = (color1 & 0x001F) * 255 + 16; | |
61 | 1135742 | b1 = (uint8_t) ((tmp / 32 + tmp) / 32); | |
62 | |||
63 |
4/4✓ Branch 0 taken 549632 times.
✓ Branch 1 taken 586110 times.
✓ Branch 2 taken 402770 times.
✓ Branch 3 taken 146862 times.
|
1135742 | if (dxtn || color0 > color1) { |
64 | 988880 | colors[0] = RGBA(r0, g0, b0, a); | |
65 | 988880 | colors[1] = RGBA(r1, g1, b1, a); | |
66 | 988880 | colors[2] = RGBA((2 * r0 + r1) / 3, | |
67 | (2 * g0 + g1) / 3, | ||
68 | (2 * b0 + b1) / 3, | ||
69 | a); | ||
70 | 988880 | colors[3] = RGBA((2 * r1 + r0) / 3, | |
71 | (2 * g1 + g0) / 3, | ||
72 | (2 * b1 + b0) / 3, | ||
73 | a); | ||
74 | } else { | ||
75 | 146862 | colors[0] = RGBA(r0, g0, b0, a); | |
76 | 146862 | colors[1] = RGBA(r1, g1, b1, a); | |
77 | 146862 | colors[2] = RGBA((r0 + r1) / 2, | |
78 | (g0 + g1) / 2, | ||
79 | (b0 + b1) / 2, | ||
80 | a); | ||
81 | 146862 | colors[3] = RGBA(0, 0, 0, alpha); | |
82 | } | ||
83 | 1135742 | } | |
84 | |||
85 | 549632 | static inline void dxt1_block_internal(uint8_t *dst, ptrdiff_t stride, | |
86 | const uint8_t *block, uint8_t alpha) | ||
87 | { | ||
88 | int x, y; | ||
89 | uint32_t colors[4]; | ||
90 | 549632 | uint16_t color0 = AV_RL16(block + 0); | |
91 | 549632 | uint16_t color1 = AV_RL16(block + 2); | |
92 | 549632 | uint32_t code = AV_RL32(block + 4); | |
93 | |||
94 | 549632 | extract_color(colors, color0, color1, 0, alpha); | |
95 | |||
96 |
2/2✓ Branch 0 taken 2198528 times.
✓ Branch 1 taken 549632 times.
|
2748160 | for (y = 0; y < 4; y++) { |
97 |
2/2✓ Branch 0 taken 8794112 times.
✓ Branch 1 taken 2198528 times.
|
10992640 | for (x = 0; x < 4; x++) { |
98 | 8794112 | uint32_t pixel = colors[code & 3]; | |
99 | 8794112 | code >>= 2; | |
100 | 8794112 | AV_WL32(dst + x * 4, pixel); | |
101 | } | ||
102 | 2198528 | dst += stride; | |
103 | } | ||
104 | 549632 | } | |
105 | |||
106 | /** | ||
107 | * Decompress one block of a DXT1 texture and store the resulting | ||
108 | * RGBA pixels in 'dst'. Alpha component is fully opaque. | ||
109 | * | ||
110 | * @param dst output buffer. | ||
111 | * @param stride scanline in bytes. | ||
112 | * @param block block to decompress. | ||
113 | * @return how much texture data has been consumed. | ||
114 | */ | ||
115 | 546560 | static int dxt1_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
116 | { | ||
117 | 546560 | dxt1_block_internal(dst, stride, block, 255); | |
118 | |||
119 | 546560 | return 8; | |
120 | } | ||
121 | |||
122 | /** | ||
123 | * Decompress one block of a DXT1 with 1-bit alpha texture and store | ||
124 | * the resulting RGBA pixels in 'dst'. Alpha is either fully opaque or | ||
125 | * fully transparent. | ||
126 | * | ||
127 | * @param dst output buffer. | ||
128 | * @param stride scanline in bytes. | ||
129 | * @param block block to decompress. | ||
130 | * @return how much texture data has been consumed. | ||
131 | */ | ||
132 | 3072 | static int dxt1a_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
133 | { | ||
134 | 3072 | dxt1_block_internal(dst, stride, block, 0); | |
135 | |||
136 | 3072 | return 8; | |
137 | } | ||
138 | |||
139 | 13758 | static inline void dxt3_block_internal(uint8_t *dst, ptrdiff_t stride, | |
140 | const uint8_t *block) | ||
141 | { | ||
142 | int x, y; | ||
143 | uint32_t colors[4]; | ||
144 | 13758 | uint16_t color0 = AV_RL16(block + 8); | |
145 | 13758 | uint16_t color1 = AV_RL16(block + 10); | |
146 | 13758 | uint32_t code = AV_RL32(block + 12); | |
147 | |||
148 | 13758 | extract_color(colors, color0, color1, 1, 0); | |
149 | |||
150 |
2/2✓ Branch 0 taken 55032 times.
✓ Branch 1 taken 13758 times.
|
68790 | for (y = 0; y < 4; y++) { |
151 | 55032 | const uint16_t alpha_code = AV_RL16(block + 2 * y); | |
152 | uint8_t alpha_values[4]; | ||
153 | |||
154 | 55032 | alpha_values[0] = ((alpha_code >> 0) & 0x0F) * 17; | |
155 | 55032 | alpha_values[1] = ((alpha_code >> 4) & 0x0F) * 17; | |
156 | 55032 | alpha_values[2] = ((alpha_code >> 8) & 0x0F) * 17; | |
157 | 55032 | alpha_values[3] = ((alpha_code >> 12) & 0x0F) * 17; | |
158 | |||
159 |
2/2✓ Branch 0 taken 220128 times.
✓ Branch 1 taken 55032 times.
|
275160 | for (x = 0; x < 4; x++) { |
160 | 220128 | uint8_t alpha = alpha_values[x]; | |
161 | 220128 | uint32_t pixel = colors[code & 3] | ((unsigned)alpha << 24); | |
162 | 220128 | code >>= 2; | |
163 | |||
164 | 220128 | AV_WL32(dst + x * 4, pixel); | |
165 | } | ||
166 | 55032 | dst += stride; | |
167 | } | ||
168 | 13758 | } | |
169 | |||
170 | /** Convert a premultiplied alpha pixel to a straight alpha pixel. */ | ||
171 | 16384 | static av_always_inline void premult2straight(uint8_t *src) | |
172 | { | ||
173 | 16384 | int r = src[0]; | |
174 | 16384 | int g = src[1]; | |
175 | 16384 | int b = src[2]; | |
176 | 16384 | int a = src[3]; /* unchanged */ | |
177 | |||
178 | 16384 | src[0] = (uint8_t) r * a / 255; | |
179 | 16384 | src[1] = (uint8_t) g * a / 255; | |
180 | 16384 | src[2] = (uint8_t) b * a / 255; | |
181 | 16384 | } | |
182 | |||
183 | /** | ||
184 | * Decompress one block of a DXT2 texture and store the resulting | ||
185 | * RGBA pixels in 'dst'. | ||
186 | * | ||
187 | * @param dst output buffer. | ||
188 | * @param stride scanline in bytes. | ||
189 | * @param block block to decompress. | ||
190 | * @return how much texture data has been consumed. | ||
191 | */ | ||
192 | 512 | static int dxt2_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
193 | { | ||
194 | int x, y; | ||
195 | |||
196 | 512 | dxt3_block_internal(dst, stride, block); | |
197 | |||
198 | /* This format is DXT3, but returns premultiplied alpha. It needs to be | ||
199 | * converted because it's what lavc outputs (and swscale expects). */ | ||
200 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 512 times.
|
2560 | for (y = 0; y < 4; y++) |
201 |
2/2✓ Branch 0 taken 8192 times.
✓ Branch 1 taken 2048 times.
|
10240 | for (x = 0; x < 4; x++) |
202 | 8192 | premult2straight(dst + x * 4 + y * stride); | |
203 | |||
204 | 512 | return 16; | |
205 | } | ||
206 | |||
207 | /** | ||
208 | * Decompress one block of a DXT3 texture and store the resulting | ||
209 | * RGBA pixels in 'dst'. | ||
210 | * | ||
211 | * @param dst output buffer. | ||
212 | * @param stride scanline in bytes. | ||
213 | * @param block block to decompress. | ||
214 | * @return how much texture data has been consumed. | ||
215 | */ | ||
216 | 13246 | static int dxt3_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
217 | { | ||
218 | 13246 | dxt3_block_internal(dst, stride, block); | |
219 | |||
220 | 13246 | return 16; | |
221 | } | ||
222 | |||
223 | /** | ||
224 | * Decompress a BC 16x3 index block stored as | ||
225 | * h g f e | ||
226 | * d c b a | ||
227 | * p o n m | ||
228 | * l k j i | ||
229 | * | ||
230 | * Bits packed as | ||
231 | * | h | g | f | e | d | c | b | a | // Entry | ||
232 | * |765 432 107 654 321 076 543 210| // Bit | ||
233 | * |0000000000111111111112222222222| // Byte | ||
234 | * | ||
235 | * into 16 8-bit indices. | ||
236 | */ | ||
237 | 580864 | static void decompress_indices(uint8_t *dst, const uint8_t *src) | |
238 | { | ||
239 | int block, i; | ||
240 | |||
241 |
2/2✓ Branch 0 taken 1161728 times.
✓ Branch 1 taken 580864 times.
|
1742592 | for (block = 0; block < 2; block++) { |
242 | 1161728 | int tmp = AV_RL24(src); | |
243 | |||
244 | /* Unpack 8x3 bit from last 3 byte block */ | ||
245 |
2/2✓ Branch 0 taken 9293824 times.
✓ Branch 1 taken 1161728 times.
|
10455552 | for (i = 0; i < 8; i++) |
246 | 9293824 | dst[i] = (tmp >> (i * 3)) & 0x7; | |
247 | |||
248 | 1161728 | src += 3; | |
249 | 1161728 | dst += 8; | |
250 | } | ||
251 | 580864 | } | |
252 | |||
253 | 572352 | static inline void dxt5_block_internal(uint8_t *dst, ptrdiff_t stride, | |
254 | const uint8_t *block) | ||
255 | { | ||
256 | int x, y; | ||
257 | uint32_t colors[4]; | ||
258 | uint8_t alpha_indices[16]; | ||
259 | 572352 | uint16_t color0 = AV_RL16(block + 8); | |
260 | 572352 | uint16_t color1 = AV_RL16(block + 10); | |
261 | 572352 | uint32_t code = AV_RL32(block + 12); | |
262 | 572352 | uint8_t alpha0 = *(block); | |
263 | 572352 | uint8_t alpha1 = *(block + 1); | |
264 | |||
265 | 572352 | decompress_indices(alpha_indices, block + 2); | |
266 | |||
267 | 572352 | extract_color(colors, color0, color1, 1, 0); | |
268 | |||
269 |
2/2✓ Branch 0 taken 2289408 times.
✓ Branch 1 taken 572352 times.
|
2861760 | for (y = 0; y < 4; y++) { |
270 |
2/2✓ Branch 0 taken 9157632 times.
✓ Branch 1 taken 2289408 times.
|
11447040 | for (x = 0; x < 4; x++) { |
271 | 9157632 | int alpha_code = alpha_indices[x + y * 4]; | |
272 | uint32_t pixel; | ||
273 | uint8_t alpha; | ||
274 | |||
275 |
2/2✓ Branch 0 taken 4580963 times.
✓ Branch 1 taken 4576669 times.
|
9157632 | if (alpha_code == 0) { |
276 | 4580963 | alpha = alpha0; | |
277 |
2/2✓ Branch 0 taken 4245123 times.
✓ Branch 1 taken 331546 times.
|
4576669 | } else if (alpha_code == 1) { |
278 | 4245123 | alpha = alpha1; | |
279 | } else { | ||
280 |
2/2✓ Branch 0 taken 300328 times.
✓ Branch 1 taken 31218 times.
|
331546 | if (alpha0 > alpha1) { |
281 | 300328 | alpha = (uint8_t) (((8 - alpha_code) * alpha0 + | |
282 | 300328 | (alpha_code - 1) * alpha1) / 7); | |
283 | } else { | ||
284 |
2/2✓ Branch 0 taken 6494 times.
✓ Branch 1 taken 24724 times.
|
31218 | if (alpha_code == 6) { |
285 | 6494 | alpha = 0; | |
286 |
2/2✓ Branch 0 taken 18150 times.
✓ Branch 1 taken 6574 times.
|
24724 | } else if (alpha_code == 7) { |
287 | 18150 | alpha = 255; | |
288 | } else { | ||
289 | 6574 | alpha = (uint8_t) (((6 - alpha_code) * alpha0 + | |
290 | 6574 | (alpha_code - 1) * alpha1) / 5); | |
291 | } | ||
292 | } | ||
293 | } | ||
294 | 9157632 | pixel = colors[code & 3] | ((unsigned)alpha << 24); | |
295 | 9157632 | code >>= 2; | |
296 | 9157632 | AV_WL32(dst + x * 4, pixel); | |
297 | } | ||
298 | 2289408 | dst += stride; | |
299 | } | ||
300 | 572352 | } | |
301 | |||
302 | /** | ||
303 | * Decompress one block of a DXT4 texture and store the resulting | ||
304 | * RGBA pixels in 'dst'. | ||
305 | * | ||
306 | * @param dst output buffer. | ||
307 | * @param stride scanline in bytes. | ||
308 | * @param block block to decompress. | ||
309 | * @return how much texture data has been consumed. | ||
310 | */ | ||
311 | 512 | static int dxt4_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
312 | { | ||
313 | int x, y; | ||
314 | |||
315 | 512 | dxt5_block_internal(dst, stride, block); | |
316 | |||
317 | /* This format is DXT5, but returns premultiplied alpha. It needs to be | ||
318 | * converted because it's what lavc outputs (and swscale expects). */ | ||
319 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 512 times.
|
2560 | for (y = 0; y < 4; y++) |
320 |
2/2✓ Branch 0 taken 8192 times.
✓ Branch 1 taken 2048 times.
|
10240 | for (x = 0; x < 4; x++) |
321 | 8192 | premult2straight(dst + x * 4 + y * stride); | |
322 | |||
323 | 512 | return 16; | |
324 | } | ||
325 | |||
326 | /** | ||
327 | * Decompress one block of a DXT5 texture and store the resulting | ||
328 | * RGBA pixels in 'dst'. | ||
329 | * | ||
330 | * @param dst output buffer. | ||
331 | * @param stride scanline in bytes. | ||
332 | * @param block block to decompress. | ||
333 | * @return how much texture data has been consumed. | ||
334 | */ | ||
335 | 549632 | static int dxt5_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
336 | { | ||
337 | 549632 | dxt5_block_internal(dst, stride, block); | |
338 | |||
339 | 549632 | return 16; | |
340 | } | ||
341 | |||
342 | /** | ||
343 | * Convert a YCoCg buffer to RGBA. | ||
344 | * | ||
345 | * @param src input buffer. | ||
346 | * @param scaled variant with scaled chroma components and opaque alpha. | ||
347 | */ | ||
348 | 355328 | static av_always_inline void ycocg2rgba(uint8_t *src, int scaled) | |
349 | { | ||
350 | 355328 | int r = src[0]; | |
351 | 355328 | int g = src[1]; | |
352 | 355328 | int b = src[2]; | |
353 | 355328 | int a = src[3]; | |
354 | |||
355 |
2/2✓ Branch 0 taken 347136 times.
✓ Branch 1 taken 8192 times.
|
355328 | int s = scaled ? (b >> 3) + 1 : 1; |
356 | 355328 | int y = a; | |
357 | 355328 | int co = (r - 128) / s; | |
358 | 355328 | int cg = (g - 128) / s; | |
359 | |||
360 | 355328 | src[0] = av_clip_uint8(y + co - cg); | |
361 | 355328 | src[1] = av_clip_uint8(y + cg); | |
362 | 355328 | src[2] = av_clip_uint8(y - co - cg); | |
363 |
2/2✓ Branch 0 taken 8192 times.
✓ Branch 1 taken 347136 times.
|
355328 | src[3] = scaled ? 255 : b; |
364 | 355328 | } | |
365 | |||
366 | /** | ||
367 | * Decompress one block of a DXT5 texture with classic YCoCg and store | ||
368 | * the resulting RGBA pixels in 'dst'. Alpha component is fully opaque. | ||
369 | * | ||
370 | * @param dst output buffer. | ||
371 | * @param stride scanline in bytes. | ||
372 | * @param block block to decompress. | ||
373 | * @return how much texture data has been consumed. | ||
374 | */ | ||
375 | 512 | static int dxt5y_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
376 | { | ||
377 | int x, y; | ||
378 | |||
379 | /* This format is basically DXT5, with luma stored in alpha. | ||
380 | * Run a normal decompress and then reorder the components. */ | ||
381 | 512 | dxt5_block_internal(dst, stride, block); | |
382 | |||
383 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 512 times.
|
2560 | for (y = 0; y < 4; y++) |
384 |
2/2✓ Branch 0 taken 8192 times.
✓ Branch 1 taken 2048 times.
|
10240 | for (x = 0; x < 4; x++) |
385 | 8192 | ycocg2rgba(dst + x * 4 + y * stride, 0); | |
386 | |||
387 | 512 | return 16; | |
388 | } | ||
389 | |||
390 | /** | ||
391 | * Decompress one block of a DXT5 texture with scaled YCoCg and store | ||
392 | * the resulting RGBA pixels in 'dst'. Alpha component is fully opaque. | ||
393 | * | ||
394 | * @param dst output buffer. | ||
395 | * @param stride scanline in bytes. | ||
396 | * @param block block to decompress. | ||
397 | * @return how much texture data has been consumed. | ||
398 | */ | ||
399 | 21696 | static int dxt5ys_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
400 | { | ||
401 | int x, y; | ||
402 | |||
403 | /* This format is basically DXT5, with luma stored in alpha. | ||
404 | * Run a normal decompress and then reorder the components. */ | ||
405 | 21696 | dxt5_block_internal(dst, stride, block); | |
406 | |||
407 |
2/2✓ Branch 0 taken 86784 times.
✓ Branch 1 taken 21696 times.
|
108480 | for (y = 0; y < 4; y++) |
408 |
2/2✓ Branch 0 taken 347136 times.
✓ Branch 1 taken 86784 times.
|
433920 | for (x = 0; x < 4; x++) |
409 | 347136 | ycocg2rgba(dst + x * 4 + y * stride, 1); | |
410 | |||
411 | 21696 | return 16; | |
412 | } | ||
413 | |||
414 | 8512 | static inline void rgtc_block_internal(uint8_t *dst, ptrdiff_t stride, | |
415 | const uint8_t *block, | ||
416 | const int *color_tab, int mono, int offset, int pix_size) | ||
417 | { | ||
418 | uint8_t indices[16]; | ||
419 | int x, y; | ||
420 | |||
421 | 8512 | decompress_indices(indices, block + 2); | |
422 | |||
423 | /* Only one or two channels are stored at most, since it only used to | ||
424 | * compress specular (black and white) or normal (red and green) maps. | ||
425 | * Although the standard says to zero out unused components, many | ||
426 | * implementations fill all of them with the same value. */ | ||
427 |
2/2✓ Branch 0 taken 34048 times.
✓ Branch 1 taken 8512 times.
|
42560 | for (y = 0; y < 4; y++) { |
428 |
2/2✓ Branch 0 taken 136192 times.
✓ Branch 1 taken 34048 times.
|
170240 | for (x = 0; x < 4; x++) { |
429 | 136192 | int i = indices[x + y * 4]; | |
430 | /* Interval expansion from [-1 1] or [0 1] to [0 255]. */ | ||
431 | 136192 | int c = color_tab[i]; | |
432 | |||
433 |
2/2✓ Branch 0 taken 46080 times.
✓ Branch 1 taken 90112 times.
|
136192 | if (mono){ |
434 | 46080 | dst [x * pix_size + y * stride + offset] = (uint8_t)c; | |
435 | } | ||
436 | else{ | ||
437 | 90112 | uint32_t pixel = RGBA(c, c, c, 255U); | |
438 | 90112 | AV_WL32(dst + x * pix_size + y * stride, pixel); | |
439 | } | ||
440 | } | ||
441 | } | ||
442 | 8512 | } | |
443 | |||
444 | 8512 | static inline void rgtc1_block_internal(uint8_t *dst, ptrdiff_t stride, | |
445 | const uint8_t *block, int sign, int mono, int offset, int pix_size) | ||
446 | { | ||
447 | int color_table[8]; | ||
448 | int r0, r1; | ||
449 | |||
450 |
2/2✓ Branch 0 taken 1536 times.
✓ Branch 1 taken 6976 times.
|
8512 | if (sign) { |
451 | /* signed data is in [-128 127] so just offset it to unsigned | ||
452 | * and it can be treated exactly the same */ | ||
453 | 1536 | r0 = ((int8_t) block[0]) + 128; | |
454 | 1536 | r1 = ((int8_t) block[1]) + 128; | |
455 | } else { | ||
456 | 6976 | r0 = block[0]; | |
457 | 6976 | r1 = block[1]; | |
458 | } | ||
459 | |||
460 | 8512 | color_table[0] = r0; | |
461 | 8512 | color_table[1] = r1; | |
462 | |||
463 |
2/2✓ Branch 0 taken 4848 times.
✓ Branch 1 taken 3664 times.
|
8512 | if (r0 > r1) { |
464 | /* 6 interpolated color values */ | ||
465 | 4848 | color_table[2] = (6 * r0 + 1 * r1) / 7; // bit code 010 | |
466 | 4848 | color_table[3] = (5 * r0 + 2 * r1) / 7; // bit code 011 | |
467 | 4848 | color_table[4] = (4 * r0 + 3 * r1) / 7; // bit code 100 | |
468 | 4848 | color_table[5] = (3 * r0 + 4 * r1) / 7; // bit code 101 | |
469 | 4848 | color_table[6] = (2 * r0 + 5 * r1) / 7; // bit code 110 | |
470 | 4848 | color_table[7] = (1 * r0 + 6 * r1) / 7; // bit code 111 | |
471 | } else { | ||
472 | /* 4 interpolated color values */ | ||
473 | 3664 | color_table[2] = (4 * r0 + 1 * r1) / 5; // bit code 010 | |
474 | 3664 | color_table[3] = (3 * r0 + 2 * r1) / 5; // bit code 011 | |
475 | 3664 | color_table[4] = (2 * r0 + 3 * r1) / 5; // bit code 100 | |
476 | 3664 | color_table[5] = (1 * r0 + 4 * r1) / 5; // bit code 101 | |
477 | 3664 | color_table[6] = 0; /* min range */ // bit code 110 | |
478 | 3664 | color_table[7] = 255; /* max range */ // bit code 111 | |
479 | } | ||
480 | |||
481 | 8512 | rgtc_block_internal(dst, stride, block, color_table, mono, offset, pix_size); | |
482 | 8512 | } | |
483 | |||
484 | /** | ||
485 | * Decompress one block of a RGRC1 texture with signed components | ||
486 | * and store the resulting RGBA pixels in 'dst'. | ||
487 | * | ||
488 | * @param dst output buffer. | ||
489 | * @param stride scanline in bytes. | ||
490 | * @param block block to decompress. | ||
491 | * @return how much texture data has been consumed. | ||
492 | */ | ||
493 | 512 | static int rgtc1s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
494 | { | ||
495 | 512 | rgtc1_block_internal(dst, stride, block, 1, 0, 0, 4); | |
496 | |||
497 | 512 | return 8; | |
498 | } | ||
499 | |||
500 | /** | ||
501 | * Decompress one block of a RGRC1 texture with unsigned components | ||
502 | * and store the resulting RGBA pixels in 'dst'. | ||
503 | * | ||
504 | * @param dst output buffer. | ||
505 | * @param stride scanline in bytes. | ||
506 | * @param block block to decompress. | ||
507 | * @return how much texture data has been consumed. | ||
508 | */ | ||
509 | 1024 | static int rgtc1u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
510 | { | ||
511 | 1024 | rgtc1_block_internal(dst, stride, block, 0, 0, 0, 4); | |
512 | |||
513 | 1024 | return 8; | |
514 | } | ||
515 | |||
516 | /** | ||
517 | * Decompress one block of a RGTC1 texture with unsigned components | ||
518 | * and overwrite the alpha component in 'dst' (RGBA data). | ||
519 | * | ||
520 | * @param dst output buffer. | ||
521 | * @param stride scanline in bytes. | ||
522 | * @param block block to decompress. | ||
523 | * @return how much texture data has been consumed. | ||
524 | */ | ||
525 | 1728 | static int rgtc1u_alpha_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
526 | { | ||
527 | 1728 | rgtc1_block_internal(dst, stride, block, 0, 1, 3, 4); | |
528 | |||
529 | 1728 | return 8; | |
530 | } | ||
531 | |||
532 | /** | ||
533 | * Decompress one block of a RGTC1 texture with unsigned components | ||
534 | * to Gray 8. | ||
535 | * | ||
536 | * @param dst output buffer. | ||
537 | * @param stride scanline in bytes. | ||
538 | * @param block block to decompress. | ||
539 | * @return how much texture data has been consumed. | ||
540 | */ | ||
541 | 1152 | static int rgtc1u_gray_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
542 | { | ||
543 | 1152 | rgtc1_block_internal(dst, stride, block, 0, 1, 0, 1); | |
544 | |||
545 | 1152 | return 8; | |
546 | } | ||
547 | |||
548 | 2048 | static inline void rgtc2_block_internal(uint8_t *dst, ptrdiff_t stride, | |
549 | const uint8_t *block, int sign) | ||
550 | { | ||
551 | /* 4x4 block containing 4 component pixels. */ | ||
552 | uint8_t c0[4 * 4 * 4]; | ||
553 | uint8_t c1[4 * 4 * 4]; | ||
554 | int x, y; | ||
555 | |||
556 | /* Decompress the two channels separately and interleave them afterwards. */ | ||
557 | 2048 | rgtc1_block_internal(c0, 16, block, sign, 0, 0, 4); | |
558 | 2048 | rgtc1_block_internal(c1, 16, block + 8, sign, 0, 0, 4); | |
559 | |||
560 | /* B is rebuilt exactly like a normal map. */ | ||
561 |
2/2✓ Branch 0 taken 8192 times.
✓ Branch 1 taken 2048 times.
|
10240 | for (y = 0; y < 4; y++) { |
562 |
2/2✓ Branch 0 taken 32768 times.
✓ Branch 1 taken 8192 times.
|
40960 | for (x = 0; x < 4; x++) { |
563 | 32768 | uint8_t *p = dst + x * 4 + y * stride; | |
564 | 32768 | int r = c0[x * 4 + y * 16]; | |
565 | 32768 | int g = c1[x * 4 + y * 16]; | |
566 | 32768 | int b = 127; | |
567 | |||
568 | 32768 | int d = (255 * 255 - r * r - g * g) / 2; | |
569 |
2/2✓ Branch 0 taken 26534 times.
✓ Branch 1 taken 6234 times.
|
32768 | if (d > 0) |
570 | 26534 | b = lrint(sqrtf(d)); | |
571 | |||
572 | 32768 | p[0] = r; | |
573 | 32768 | p[1] = g; | |
574 | 32768 | p[2] = b; | |
575 | 32768 | p[3] = 255; | |
576 | } | ||
577 | } | ||
578 | 2048 | } | |
579 | |||
580 | /** | ||
581 | * Decompress one block of a RGRC2 texture with signed components | ||
582 | * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque. | ||
583 | * | ||
584 | * @param dst output buffer. | ||
585 | * @param stride scanline in bytes. | ||
586 | * @param block block to decompress. | ||
587 | * @return how much texture data has been consumed. | ||
588 | */ | ||
589 | 512 | static int rgtc2s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
590 | { | ||
591 | 512 | rgtc2_block_internal(dst, stride, block, 1); | |
592 | |||
593 | 512 | return 16; | |
594 | } | ||
595 | |||
596 | /** | ||
597 | * Decompress one block of a RGRC2 texture with unsigned components | ||
598 | * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque. | ||
599 | * | ||
600 | * @param dst output buffer. | ||
601 | * @param stride scanline in bytes. | ||
602 | * @param block block to decompress. | ||
603 | * @return how much texture data has been consumed. | ||
604 | */ | ||
605 | 512 | static int rgtc2u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
606 | { | ||
607 | 512 | rgtc2_block_internal(dst, stride, block, 0); | |
608 | |||
609 | 512 | return 16; | |
610 | } | ||
611 | |||
612 | /** | ||
613 | * Decompress one block of a 3Dc texture with unsigned components | ||
614 | * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque. | ||
615 | * | ||
616 | * @param dst output buffer. | ||
617 | * @param stride scanline in bytes. | ||
618 | * @param block block to decompress. | ||
619 | * @return how much texture data has been consumed. | ||
620 | */ | ||
621 | 1024 | static int dxn3dc_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block) | |
622 | { | ||
623 | int x, y; | ||
624 | 1024 | rgtc2_block_internal(dst, stride, block, 0); | |
625 | |||
626 | /* This is the 3Dc variant of RGTC2, with swapped R and G. */ | ||
627 |
2/2✓ Branch 0 taken 4096 times.
✓ Branch 1 taken 1024 times.
|
5120 | for (y = 0; y < 4; y++) { |
628 |
2/2✓ Branch 0 taken 16384 times.
✓ Branch 1 taken 4096 times.
|
20480 | for (x = 0; x < 4; x++) { |
629 | 16384 | uint8_t *p = dst + x * 4 + y * stride; | |
630 | 16384 | FFSWAP(uint8_t, p[0], p[1]); | |
631 | } | ||
632 | } | ||
633 | |||
634 | 1024 | return 16; | |
635 | } | ||
636 | |||
637 | 156 | av_cold void ff_texturedsp_init(TextureDSPContext *c) | |
638 | { | ||
639 | 156 | c->dxt1_block = dxt1_block; | |
640 | 156 | c->dxt1a_block = dxt1a_block; | |
641 | 156 | c->dxt2_block = dxt2_block; | |
642 | 156 | c->dxt3_block = dxt3_block; | |
643 | 156 | c->dxt4_block = dxt4_block; | |
644 | 156 | c->dxt5_block = dxt5_block; | |
645 | 156 | c->dxt5y_block = dxt5y_block; | |
646 | 156 | c->dxt5ys_block = dxt5ys_block; | |
647 | 156 | c->rgtc1s_block = rgtc1s_block; | |
648 | 156 | c->rgtc1u_block = rgtc1u_block; | |
649 | 156 | c->rgtc1u_gray_block = rgtc1u_gray_block; | |
650 | 156 | c->rgtc1u_alpha_block = rgtc1u_alpha_block; | |
651 | 156 | c->rgtc2s_block = rgtc2s_block; | |
652 | 156 | c->rgtc2u_block = rgtc2u_block; | |
653 | 156 | c->dxn3dc_block = dxn3dc_block; | |
654 | 156 | } | |
655 | |||
656 | #define TEXTUREDSP_FUNC_NAME ff_texturedsp_decompress_thread | ||
657 | #define TEXTUREDSP_TEX_FUNC(a, b, c) tex_funct(a, b, c) | ||
658 | #include "texturedsp_template.c" | ||
659 |