Fawkes API Fawkes Development Version
yuvrgb.cpp
1
2/****************************************************************************
3 * yuvrgb.h - YUV to RGB conversion - specific methods, macros and constants
4 *
5 * Created: Sat Aug 12 15:02:41 2006
6 * based on colorspaces.h from Tue Feb 23 13:49:38 2005
7 * Copyright 2005-2006 Tim Niemueller [www.niemueller.de]
8 *
9 ****************************************************************************/
10
11/* This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version. A runtime exception applies to
15 * this software (see LICENSE.GPL_WRE file mentioned below for details).
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * Read the full text in the LICENSE.GPL_WRE file in the doc directory.
23 */
24
25#include <core/macros.h>
26#include <fvutils/color/yuvrgb.h>
27#include <fvutils/cpu/mmx.h>
28
29namespace firevision {
30
31/** YUV to RGB Conversion
32 * B = 1.164(Y - 16) + 2.018(U - 128)
33 * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
34 * R = 1.164(Y - 16) + 1.596(V - 128)
35 *
36 * Values have to be clamped to keep them in the [0-255] range.
37 * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
38 * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
39 * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
40 * line
41 * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
42 * (thus this is a 24bit RGB with one byte per color) line by line.
43 * @param width Width of the image contained in the YUV buffer
44 * @param height Height of the image contained in the YUV buffer
45 */
46void
47yuv411packed_to_rgb_plainc(const unsigned char *YUV,
48 unsigned char * RGB,
49 unsigned int width,
50 unsigned int height)
51{
52 int y0, y1, y2, y3, u, v;
53 unsigned int i = 0;
54 while (i < (width * height) * 3 / 2) {
55 u = YUV[i++] - 128;
56 y0 = YUV[i++] - 16;
57 y1 = YUV[i++] - 16;
58 v = YUV[i++] - 128;
59 y2 = YUV[i++] - 16;
60 y3 = YUV[i++] - 16;
61
62 // Set red, green and blue bytes for pixel 0
63 *RGB++ = clip((76284 * y0 + 104595 * v) >> 16);
64 *RGB++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
65 *RGB++ = clip((76284 * y0 + 132252 * u) >> 16);
66
67 // Set red, green and blue bytes for pixel 1
68 *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
69 *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
70 *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
71
72 // Set red, green and blue bytes for pixel 2
73 *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
74 *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
75 *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
76
77 // Set red, green and blue bytes for pixel 3
78 *RGB++ = clip((76284 * y3 + 104595 * v) >> 16);
79 *RGB++ = clip((76284 * y3 - 25625 * u - 53281 * v) >> 16);
80 *RGB++ = clip((76284 * y3 + 132252 * u) >> 16);
81 }
82}
83
84/** YUV to RGB Conversion
85 * B = 1.164(Y - 16) + 2.018(U - 128)
86 * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
87 * R = 1.164(Y - 16) + 1.596(V - 128)
88 *
89 * Values have to be clamped to keep them in the [0-255] range.
90 * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
91 * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
92 * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
93 * line
94 * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
95 * (thus this is a 24bit RGB with one byte per color) line by line.
96 * @param width Width of the image contained in the YUV buffer
97 * @param height Height of the image contained in the YUV buffer
98 */
99void
100yuv422planar_to_rgb_plainc(const unsigned char *planar,
101 unsigned char * RGB,
102 unsigned int width,
103 unsigned int height)
104{
105 short y1, y2, u, v;
106 const unsigned char *yp, *up, *vp;
107 unsigned int i;
108
109 yp = planar;
110 up = planar + (width * height);
111 vp = up + (width * height / 2);
112
113 for (i = 0; i < (width * height / 2); ++i) {
114 y1 = *yp++;
115 y2 = *yp++;
116 u = *up++;
117 v = *vp++;
118
119 y1 -= 16;
120 y2 -= 16;
121 u -= 128;
122 v -= 128;
123
124 // Set red, green and blue bytes for pixel 0
125 *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
126 *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
127 *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
128
129 // Set red, green and blue bytes for pixel 1
130 *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
131 *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
132 *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
133 }
134}
135
136/** YUV to RGB Conversion
137 * B = 1.164(Y - 16) + 2.018(U - 128)
138 * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
139 * R = 1.164(Y - 16) + 1.596(V - 128)
140 *
141 * Values have to be clamped to keep them in the [0-255] range.
142 * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
143 * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
144 * @param YUV unsigned char array that contains the pixels, 4 pixels in 8 byte macro pixel, line after
145 * line
146 * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
147 * (thus this is a 24bit RGB with one byte per color) line by line.
148 * @param width Width of the image contained in the YUV buffer
149 * @param height Height of the image contained in the YUV buffer
150 */
151void
152yuv422packed_to_rgb_plainc(const unsigned char *YUV,
153 unsigned char * RGB,
154 const unsigned int width,
155 const unsigned int height)
156{
157 int y0, y1, u, v;
158 unsigned int i = 0;
159 for (unsigned int pixel = 0; pixel < (width * height); pixel += 2) {
160 u = YUV[i++] - 128;
161 y0 = YUV[i++] - 16;
162 v = YUV[i++] - 128;
163 y1 = YUV[i++] - 16;
164
165 // Set red, green and blue bytes for pixel 0
166 *RGB++ = clip((76284 * y0 + 104595 * v) >> 16);
167 *RGB++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
168 *RGB++ = clip((76284 * y0 + 132252 * u) >> 16);
169
170 // Set red, green and blue bytes for pixel 1
171 *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
172 *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
173 *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
174 }
175}
176
177/** Convert YUV422 planar to BGR.
178 * Use formula in aforementioned function.
179 * @param YUV YUV422 planar buffer
180 * @param BGR BGR buffer
181 * @param width Width of the image contained in the YUV buffer
182 * @param height Height of the image contained in the YUV buffer
183 */
184void
185yuv422planar_to_bgr_plainc(const unsigned char *planar,
186 unsigned char * BGR,
187 unsigned int width,
188 unsigned int height)
189{
190 short y1, y2, u, v;
191 const unsigned char *yp, *up, *vp;
192 unsigned int i;
193
194 yp = planar;
195 up = planar + (width * height);
196 vp = up + (width * height / 2);
197
198 for (i = 0; i < (width * height / 2); ++i) {
199 y1 = *yp++;
200 y2 = *yp++;
201 u = *up++;
202 v = *vp++;
203
204 y1 -= 16;
205 y2 -= 16;
206 u -= 128;
207 v -= 128;
208
209 // Set red, green and blue bytes for pixel 0
210 *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
211 *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
212 *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
213
214 // Set red, green and blue bytes for pixel 1
215 *BGR++ = clip((76284 * y2 + 132252 * u) >> 16);
216 *BGR++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
217 *BGR++ = clip((76284 * y2 + 104595 * v) >> 16);
218 }
219}
220
221void
222yuv422planar_to_rgb_with_alpha_plainc(const unsigned char *planar,
223 unsigned char * RGB,
224 unsigned int width,
225 unsigned int height)
226{
227 short y1, y2, u, v;
228 const unsigned char *yp, *up, *vp;
229 unsigned int i;
230
231 yp = planar;
232 up = planar + (width * height);
233 vp = up + (width * height / 2);
234
235 for (i = 0; i < (width * height / 2); ++i) {
236 y1 = *yp++;
237 y2 = *yp++;
238 u = *up++;
239 v = *vp++;
240
241 y1 -= 16;
242 y2 -= 16;
243 u -= 128;
244 v -= 128;
245
246 // Set red, green and blue bytes for pixel 0
247 *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
248 *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
249 *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
250 *RGB++ = 255;
251
252 // Set red, green and blue bytes for pixel 1
253 *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
254 *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
255 *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
256 *RGB++ = 255;
257 }
258}
259
260void
261yuv422planar_to_bgr_with_alpha_plainc(const unsigned char *planar,
262 unsigned char * BGR,
263 unsigned int width,
264 unsigned int height)
265{
266 short y1, y2, u, v;
267 const unsigned char *yp, *up, *vp;
268 unsigned int i;
269
270 yp = planar;
271 up = planar + (width * height);
272 vp = up + (width * height / 2);
273
274 for (i = 0; i < (width * height / 2); ++i) {
275 y1 = *yp++;
276 y2 = *yp++;
277 u = *up++;
278 v = *vp++;
279
280 y1 -= 16;
281 y2 -= 16;
282 u -= 128;
283 v -= 128;
284
285 // Set red, green and blue bytes for pixel 0
286 *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
287 *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
288 *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
289 *BGR++ = 255;
290
291 // Set red, green and blue bytes for pixel 1
292 *BGR++ = clip((76284 * y2 + 132252 * u) >> 16);
293 *BGR++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
294 *BGR++ = clip((76284 * y2 + 104595 * v) >> 16);
295 *BGR++ = 255;
296 }
297}
298
299void
300yuv422packed_to_bgr_with_alpha_plainc(const unsigned char *YUV,
301 unsigned char * BGR,
302 unsigned int width,
303 unsigned int height)
304{
305 int y0, y1, u, v;
306 unsigned int i = 0;
307 while (i < (width * height * 2)) {
308 u = YUV[i++] - 128;
309 y0 = YUV[i++] - 16;
310 v = YUV[i++] - 128;
311 y1 = YUV[i++] - 16;
312
313 // Set red, green and blue bytes for pixel 0
314 *BGR++ = clip((76284 * y0 + 132252 * u) >> 16);
315 *BGR++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
316 *BGR++ = clip((76284 * y0 + 104595 * v) >> 16);
317 *BGR++ = 255;
318
319 // Set red, green and blue bytes for pixel 1
320 *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
321 *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
322 *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
323 *BGR++ = 255;
324 }
325}
326
327#if (defined __i386__ || defined __386__ || defined __X86__ || defined _M_IX86 || defined i386)
328
329# define CRV 104595
330# define CBU 132251
331# define CGU 25624
332# define CGV 53280
333# define YMUL 76283
334# define OFF 32768
335# define BITRES 16
336
337/* calculation float resolution in bits */
338/* ie RES = 6 is 10.6 fixed point */
339/* RES = 8 is 8.8 fixed point */
340/* RES = 4 is 12.4 fixed point */
341/* NB: going above 6 will lead to overflow... :( */
342# define RES 6
343
344# define RZ(i) (i >> (BITRES - RES))
345# define FOUR(i) \
346 { \
347 i, i, i, i \
348 }
349
350__aligned(8) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
351__aligned(8) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
352__aligned(8) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
353__aligned(8) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
354__aligned(8) const volatile unsigned short _const_ymul[4] = FOUR(RZ(YMUL));
355__aligned(8) const volatile unsigned short _const_128[4] = FOUR(128);
356__aligned(8) const volatile unsigned short _const_32[4] = FOUR(RZ(OFF));
357__aligned(8) const volatile unsigned short _const_16[4] = FOUR(16);
358
359# define CONST_CRVCRV *_const_crvcrv
360# define CONST_CBUCBU *_const_cbucbu
361# define CONST_CGUCGU *_const_cgucgu
362# define CONST_CGVCGV *_const_cgvcgv
363# define CONST_YMUL *_const_ymul
364# define CONST_128 *_const_128
365# define CONST_32 *_const_32
366# define CONST_16 *_const_16
367
368void
369yuv411planar_to_rgb_mmx(const unsigned char *yuv,
370 unsigned char * rgb,
371 unsigned int w,
372 unsigned int h)
373{
374 unsigned int xx, yy;
375 const unsigned char *yp1, *up, *vp;
376 unsigned char * dp1;
377
378 /* plane pointers */
379 yp1 = yuv;
380 up = yuv + (w * h);
381 vp = up + (w * (h / 4));
382 /* destination pointers */
383 dp1 = rgb;
384
385 yp1 = yuv;
386 up = yuv + (w * h);
387 vp = up + ((w / 2) * (h / 2));
388 dp1 = rgb;
389 for (yy = 0; yy < h; yy++) {
390 for (xx = 0; xx < w; xx += 8) {
391 movq_m2r(*yp1, mm0);
392 movq_r2r(mm0, mm1);
393 psrlw_i2r(8, mm0);
394 psllw_i2r(8, mm1);
395 psrlw_i2r(8, mm1);
396
397 pxor_r2r(mm7, mm7);
398 movd_m2r(*up, mm3);
399 movd_m2r(*vp, mm2);
400
401 punpcklbw_r2r(mm7, mm2);
402 punpcklbw_r2r(mm7, mm3);
403
404 movq_m2r(CONST_16, mm4);
405 psubsw_r2r(mm4, mm0);
406 psubsw_r2r(mm4, mm1);
407
408 movq_m2r(CONST_128, mm5);
409 psubsw_r2r(mm5, mm2);
410 psubsw_r2r(mm5, mm3);
411
412 movq_m2r(CONST_YMUL, mm4);
413 pmullw_r2r(mm4, mm0);
414 pmullw_r2r(mm4, mm1);
415
416 movq_m2r(CONST_CRVCRV, mm7);
417 pmullw_r2r(mm3, mm7);
418
419 movq_m2r(CONST_CBUCBU, mm6);
420 pmullw_r2r(mm2, mm6);
421
422 movq_m2r(CONST_CGUCGU, mm5);
423 pmullw_r2r(mm2, mm5);
424
425 movq_m2r(CONST_CGVCGV, mm4);
426 pmullw_r2r(mm3, mm4);
427
428 movq_r2r(mm0, mm2);
429 paddsw_r2r(mm7, mm2);
430 paddsw_r2r(mm1, mm7);
431
432 psraw_i2r(RES, mm2);
433 psraw_i2r(RES, mm7);
434 packuswb_r2r(mm7, mm2);
435
436 pxor_r2r(mm7, mm7);
437 movq_r2r(mm2, mm3);
438 punpckhbw_r2r(mm7, mm2);
439 punpcklbw_r2r(mm3, mm7);
440 por_r2r(mm7, mm2);
441
442 movq_r2r(mm0, mm3);
443 psubsw_r2r(mm5, mm3);
444 psubsw_r2r(mm4, mm3);
445 paddsw_m2r(CONST_32, mm3);
446
447 movq_r2r(mm1, mm7);
448 psubsw_r2r(mm5, mm7);
449 psubsw_r2r(mm4, mm7);
450 paddsw_m2r(CONST_32, mm7);
451
452 psraw_i2r(RES, mm3);
453 psraw_i2r(RES, mm7);
454 packuswb_r2r(mm7, mm3);
455
456 pxor_r2r(mm7, mm7);
457 movq_r2r(mm3, mm4);
458 punpckhbw_r2r(mm7, mm3);
459 punpcklbw_r2r(mm4, mm7);
460 por_r2r(mm7, mm3);
461
462 movq_m2r(CONST_32, mm4);
463 paddsw_r2r(mm6, mm0);
464 paddsw_r2r(mm6, mm1);
465 paddsw_r2r(mm4, mm0);
466 paddsw_r2r(mm4, mm1);
467 psraw_i2r(RES, mm0);
468 psraw_i2r(RES, mm1);
469 packuswb_r2r(mm1, mm0);
470
471 pxor_r2r(mm7, mm7);
472 movq_r2r(mm0, mm5);
473 punpckhbw_r2r(mm7, mm0);
474 punpcklbw_r2r(mm5, mm7);
475 por_r2r(mm7, mm0);
476
477 pxor_r2r(mm1, mm1);
478 movq_r2r(mm0, mm5);
479 movq_r2r(mm3, mm6);
480 movq_r2r(mm2, mm7);
481 punpckhbw_r2r(mm3, mm2);
482 punpcklbw_r2r(mm6, mm7);
483 punpckhbw_r2r(mm1, mm0);
484 punpcklbw_r2r(mm1, mm5);
485
486 movq_r2r(mm7, mm1);
487 punpckhwd_r2r(mm5, mm7);
488 punpcklwd_r2r(mm5, mm1);
489
490 movq_r2r(mm2, mm4);
491 punpckhwd_r2r(mm0, mm2);
492 punpcklwd_r2r(mm0, mm4);
493
494 movntq_r2m(mm1, *(dp1));
495 movntq_r2m(mm7, *(dp1 + 8));
496 movntq_r2m(mm4, *(dp1 + 16));
497 movntq_r2m(mm2, *(dp1 + 24));
498
499 yp1 += 8;
500 up += 4;
501 vp += 4;
502 dp1 += 8 * 4;
503 }
504 if (yy & 0x1) {
505 up -= w / 2;
506 vp -= w / 2;
507 }
508 }
509 emms();
510}
511#endif
512
513} // end namespace firevision