SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
coder_fibonacci.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_CODER_FIBONACCI_INCLUDED
9#define SDSL_CODER_FIBONACCI_INCLUDED
10
11#include <sdsl/int_vector.hpp>
12
13namespace sdsl
14{
15
16namespace coder
17{
18
20template <typename T = void>
22{
23 public:
24 static struct impl
25 {
26 uint64_t fib12bit_to_bin[(1 << 12) * 8];
28
33 uint8_t fib2bin_shift[(1 << 13)];
35
40 uint16_t fib2bin_16_greedy[(1 << 16)];
41
43 uint64_t fib2bin_0_95[(1 << 12) * 8];
44
45 impl()
46 {
47 for (uint32_t x = 0; x <= 0x1FFF; ++x)
48 {
49 if (bits::cnt11(x)) { fib2bin_shift[x] = bits::sel11(x, 1) + 1; }
50 else
51 {
52 fib2bin_shift[x] = 0;
53 }
54 }
55 for (uint32_t x = 0; x < 1 << 16; ++x)
56 {
57 uint16_t w = 0;
58 uint32_t offset = 0;
59 if (uint32_t cnt = bits::cnt11(x))
60 {
61 uint32_t y = x;
62 uint32_t fib_pos = 1;
63 do {
64 if (y & 1)
65 {
66 w += bits::lt_fib[fib_pos - 1];
67 if (y & 2)
68 {
69 --cnt;
70 ++offset;
71 fib_pos = 0;
72 y >>= 1;
73 }
74 }
75 ++fib_pos;
76 ++offset;
77 y >>= 1;
78 } while (cnt);
79 }
80 fib2bin_16_greedy[x] = (offset << 11) | w;
81 }
82 for (uint32_t p = 0; p < 8; ++p)
83 {
84 for (uint32_t x = 0; x <= 0xFFF; ++x)
85 {
86 uint64_t w = 0;
87 for (uint32_t j = 0; j < 12 and 12 * p + j < 92; ++j)
88 {
89 if ((x >> j) & 1ULL)
90 {
91 w += bits::lt_fib[12 * p + j];
92 if (x >> (j + 1) & 1ULL) { break; }
93 }
94 }
95 fib2bin_0_95[(p << 12) | x] = w;
96 }
97 }
98 }
99 } data;
100
101 typedef uint64_t size_type;
102
103 static const uint8_t min_codeword_length = 2; // 11 represents 1 and is the code word with minimum length
105
107 static uint8_t encoding_length(uint64_t w);
109 /* \param data Bitstring
110 * \param start_idx Starting index of the decoding.
111 * \param n Number of values to decode from the bitstring.
112 * \param it Iterator
113 */
114 template <bool t_sumup, bool t_inc, class t_iter>
115 static uint64_t decode(const uint64_t * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
116
117 template <bool t_sumup, bool t_inc, class t_iter>
118 static uint64_t decode1(const uint64_t * data,
119 const size_type start_idx,
120 size_type n,
121 t_iter it = (t_iter) nullptr);
122
125
130 static uint64_t decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n);
131
134
136 static uint64_t decode_prefix_sum(const uint64_t * d,
137 const size_type start_idx,
138 const size_type end_idx,
139 size_type n);
140
141 template <class int_vector1, class int_vector2>
142 static bool encode(const int_vector1 & v, int_vector2 & z);
143
144 template <class int_vector>
145 static uint64_t * raw_data(int_vector & v)
146 {
147 return v.m_data;
148 }
149
151
155 static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
156
157 template <class int_vector1, class int_vector2>
158 static bool decode(const int_vector1 & z, int_vector2 & v);
159};
160
161template <typename T>
162inline uint8_t fibonacci<T>::encoding_length(uint64_t w)
163{
164 if (w == 0) { return 93; }
165 // This limit for the leftmost 1bit in the resulting fib code could be improved using a table
166 uint8_t len_1 = bits::hi(w); // len-1 of the fib code
167 while (++len_1 < (uint8_t)(sizeof(bits::lt_fib) / sizeof(bits::lt_fib[0])) && w >= bits::lt_fib[len_1])
168 ;
169 return len_1 + 1;
170}
171
172template <typename T>
173template <class int_vector1, class int_vector2>
174inline bool fibonacci<T>::encode(const int_vector1 & v, int_vector2 & z)
175{
176 uint64_t z_bit_size = 0;
177 uint64_t w;
178 const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0;
179 for (typename int_vector1::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
180 {
181 if ((w = *it) == 0)
182 {
183 if (v.width() < 64) { w = zero_val; }
184 }
185 z_bit_size += encoding_length(w);
186 }
187 z.bit_resize(z_bit_size);
188 z.shrink_to_fit();
189 if (z_bit_size & 0x3F)
190 { // if z_bit_size % 64 != 0
191 *(z.m_data + (z_bit_size >> 6)) = 0; // initialize last word
192 }
193 uint64_t * z_data = z.m_data;
194 uint8_t offset = 0;
195 uint64_t fibword_high = 0x0000000000000001ULL, fibword_low;
196 uint64_t t;
197 for (typename int_vector1::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
198 {
199 w = *it;
200 if (w == 0) { w = zero_val; }
201 int8_t len_1 = encoding_length(w) - 1, j;
202 fibword_low = 0x0000000000000001ULL;
203
204 if (len_1 >= 64)
205 { // length > 65
206 fibword_high = 0x0000000000000001ULL;
207 j = len_1 - 1;
208 if (w == 0)
209 { // handle special case
210 fibword_high <<= 1;
211 fibword_high |= 1;
212 fibword_high <<= 1;
213 w -= bits::lt_fib[len_1 - 1];
214 j -= 2;
215 }
216 for (; j > 63; --j)
217 {
218 fibword_high <<= 1;
219 if (w >= (t = bits::lt_fib[j]))
220 {
221 w -= t;
222 fibword_high |= 1;
223 if (w and j > 64)
224 {
225 fibword_high <<= 1;
226 --j;
227 }
228 else
229 {
230 fibword_high <<= (64 - j);
231 break;
232 }
233 }
234 }
235 j = 64;
236 }
237 else
238 {
239 j = len_1 - 1;
240 }
241
242 for (; j >= 0; --j)
243 {
244 fibword_low <<= 1;
245 if (w >= (t = bits::lt_fib[j]))
246 {
247 w -= t;
248 fibword_low |= 1;
249 if (w)
250 {
251 fibword_low <<= 1;
252 --j;
253 }
254 else
255 {
256 fibword_low <<= (j);
257 break;
258 }
259 }
260 }
261 if (len_1 >= 64)
262 {
263 bits::write_int_and_move(z_data, fibword_low, offset, 64);
264 bits::write_int_and_move(z_data, fibword_high, offset, len_1 - 63);
265 }
266 else
267 {
268 bits::write_int_and_move(z_data, fibword_low, offset, (len_1 & 0x3F) + 1);
269 }
270 }
271 z.width(v.width());
272 return true;
273}
274
275template <typename T>
276inline void fibonacci<T>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
277{
278 uint64_t fibword_high = 0x0000000000000001ULL, fibword_low;
279 uint64_t t;
280 int8_t len_1 = encoding_length(x) - 1, j;
281 fibword_low = 0x0000000000000001ULL;
282
283 if (len_1 >= 64)
284 { // length > 65
285 fibword_high = 0x0000000000000001ULL;
286 j = len_1 - 1;
287 if (x == 0)
288 { // handle special case
289 fibword_high <<= 1;
290 fibword_high |= 1;
291 fibword_high <<= 1;
292 x -= bits::lt_fib[len_1 - 1];
293 j -= 2;
294 }
295 for (; j > 63; --j)
296 {
297 fibword_high <<= 1;
298 if (x >= (t = bits::lt_fib[j]))
299 {
300 x -= t;
301 fibword_high |= 1;
302 if (x and j > 64)
303 {
304 fibword_high <<= 1;
305 --j;
306 }
307 else
308 {
309 fibword_high <<= (64 - j);
310 break;
311 }
312 }
313 }
314 j = 64;
315 }
316 else
317 {
318 j = len_1 - 1;
319 }
320 for (; j >= 0; --j)
321 {
322 fibword_low <<= 1;
323 if (x >= (t = bits::lt_fib[j]))
324 {
325 x -= t;
326 fibword_low |= 1;
327 if (x)
328 {
329 fibword_low <<= 1;
330 --j;
331 }
332 else
333 {
334 fibword_low <<= (j);
335 break;
336 }
337 }
338 }
339 if (len_1 >= 64)
340 {
341 bits::write_int_and_move(z, fibword_low, offset, 64);
342 bits::write_int_and_move(z, fibword_high, offset, len_1 - 63);
343 }
344 else
345 {
346 bits::write_int_and_move(z, fibword_low, offset, (len_1 & 0x3F) + 1);
347 }
348}
349
350template <typename T>
351template <class int_vector1, class int_vector2>
352inline bool fibonacci<T>::decode(const int_vector1 & z, int_vector2 & v)
353{
354 uint64_t n = 0, carry = 0; // n = number of values to be decoded
355 const uint64_t * data = z.data();
356 // Determine size of v
357 if (z.empty())
358 { // if z is empty we are ready with decoding
359 v.width(z.width());
360 v.resize(0);
361 v.shrink_to_fit();
362 return true;
363 }
364 for (typename int_vector1::size_type i = 0; i < z.bit_data_size() - 1; ++i, ++data)
365 {
366 n += bits::cnt11(*data, carry);
367 }
368 if (z.bit_data_size() << 6 != z.bit_size())
369 {
370 n += bits::cnt11((*data) & bits::lo_set[z.bit_size() & 0x3F], carry);
371 }
372 else
373 {
374 n += bits::cnt11(*data, carry);
375 }
376 v.width(z.width());
377 v.resize(n);
378 v.shrink_to_fit();
379 return decode<false, true>(z.data(), 0, n, v.begin());
380}
381
382template <typename T>
383template <bool t_sumup, bool t_inc, class t_iter>
384inline uint64_t fibonacci<T>::decode(const uint64_t * data, const size_type start_idx, size_type n, t_iter it)
385{
386 data += (start_idx >> 6);
387 uint64_t w = 0, value = 0;
388 int8_t buffered = 0; // bits buffered in w, in 0..64
389 int8_t read = start_idx & 0x3F; // read bits in current *data 0..63
390 int8_t shift = 0;
391 uint32_t fibtable = 0;
392 while (n)
393 { // while not all values are decoded
394 while (buffered < 13 and bits::cnt11(w) < n)
395 {
396 w |= (((*data) >> read) << buffered);
397 if (read >= buffered)
398 {
399 ++data;
400 buffered += 64 - read;
401 read = 0;
402 }
403 else
404 { // read < buffered
405 read += 64 - buffered;
406 buffered = 64;
407 }
408 }
409 value += fibonacci<T>::data.fib2bin_0_95[(fibtable << 12) | (w & 0xFFF)];
410 shift = fibonacci<T>::data.fib2bin_shift[w & 0x1FFF];
411 if (shift > 0)
412 { // if end of decoding
413 w >>= shift;
414 buffered -= shift;
415 if (t_inc) *(it++) = value;
416 if (!t_sumup and n != 1) value = 0;
417 fibtable = 0;
418 --n;
419 }
420 else
421 { // not end of decoding
422 w >>= 12;
423 buffered -= 12;
424 ++fibtable;
425 }
426 }
427 return value;
428}
429
430template <typename T>
431template <bool t_sumup, bool t_inc, class t_iter>
432inline uint64_t fibonacci<T>::decode1(const uint64_t * d, const size_type start_idx, size_type n, t_iter it)
433{
434 d += (start_idx >> 6);
435 uint64_t w = 0, value = 0;
436 int8_t buffered = 0; // bits buffered in w, in 0..64
437 int8_t read = start_idx & 0x3F; // read bits in current *data 0..63
438 int8_t shift = 0;
439 uint32_t fibtable = 0;
440 uint8_t blocknr = (start_idx >> 6) % 9;
441 while (n)
442 { // while not all values are decoded
443 while (buffered < 13 and bits::cnt11(w) < n)
444 {
445 w |= (((*d) >> read) << buffered);
446 if (read >= buffered)
447 {
448 ++blocknr;
449 ++d;
450 if (blocknr == 8)
451 {
452 ++d;
453 blocknr = 0;
454 }
455 buffered += 64 - read;
456 read = 0;
457 }
458 else
459 { // read < buffered
460 read += 64 - buffered;
461 buffered = 64;
462 }
463 }
464 value += fibonacci<T>::data.fib2bin_0_95[(fibtable << 12) | (w & 0xFFF)];
465 shift = fibonacci<T>::data.fib2bin_shift[w & 0x1FFF];
466 if (shift > 0)
467 { // if end of decoding
468 w >>= shift;
469 buffered -= shift;
470 if (t_inc) *(it++) = value;
471 if (!t_sumup) value = 0;
472 fibtable = 0;
473 --n;
474 }
475 else
476 { // not end of decoding
477 w >>= 12;
478 buffered -= 12;
479 ++fibtable;
480 }
481 }
482 return value;
483}
484
485template <typename T>
486inline uint64_t fibonacci<T>::decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n)
487{
488 if (n == 0) return 0;
489 // return decode<true,false,int*>(data, start_idx, n);
490 d += (start_idx >> 6);
491 size_type i = 0;
492 int32_t bits_to_decode = 0;
493 uint64_t w = 0, value = 0;
494 int16_t buffered = 0, read = start_idx & 0x3F, shift = 0;
495 uint16_t temp = 0;
496 uint64_t carry = 0;
497 i = bits::cnt11(*d & ~bits::lo_set[read], carry);
498 if (i < n)
499 {
500 uint64_t oldcarry;
501 w = 0;
502 do {
503 oldcarry = carry;
504 i += (temp = bits::cnt11(*(d + (++w)), carry));
505 } while (i < n);
506 bits_to_decode += ((w - 1) << 6) + bits::sel11(*(d + w), n - (i - temp), oldcarry) + 65 - read;
507 w = 0;
508 }
509 else
510 { // i>=n
511 bits_to_decode = bits::sel11(*d >> read, n) + 1;
512 }
513 if (((size_type)bits_to_decode) == n << 1) return n;
514 if (((size_type)bits_to_decode) == (n << 1) + 1) return n + 1;
515 i = 0;
516 // while( bits_to_decode > 0 or buffered > 0){// while not all values are decoded
517 do {
518 while (buffered < 64 and bits_to_decode > 0)
519 {
520 w |= (((*d) >> read) << buffered);
521 if (read >= buffered)
522 {
523 ++d;
524 buffered += 64 - read;
525 bits_to_decode -= (64 - read);
526 read = 0;
527 }
528 else
529 { // read buffered
530 read += 64 - buffered;
531 bits_to_decode -= (64 - buffered);
532 buffered = 64;
533 }
534 if (bits_to_decode < 0)
535 {
536 buffered += bits_to_decode;
537 w &= bits::lo_set[buffered];
538 bits_to_decode = 0;
539 }
540 }
541 if (!i)
542 { // try do decode multiple values
543 if ((w & 0xFFFFFF) == 0xFFFFFF)
544 {
545 value += 12;
546 w >>= 24;
547 buffered -= 24;
548 if ((w & 0xFFFFFF) == 0xFFFFFF)
549 {
550 value += 12;
551 w >>= 24;
552 buffered -= 24;
553 }
554 }
555 do {
556 temp = fibonacci<T>::data.fib2bin_16_greedy[w & 0xFFFF];
557 if ((shift = (temp >> 11)) > 0)
558 {
559 value += (temp & 0x7FFULL);
560 w >>= shift;
561 buffered -= shift;
562 }
563 else
564 {
565 value += fibonacci<T>::data.fib2bin_0_95[w & 0xFFF];
566 w >>= 12;
567 buffered -= 12;
568 i = 1;
569 break;
570 }
571 } while (buffered > 15);
572 }
573 else
574 { // i > 0
575 value += fibonacci<T>::data.fib2bin_0_95[(i << 12) | (w & 0xFFF)];
576 shift = fibonacci<T>::data.fib2bin_shift[w & 0x1FFF];
577 if (shift > 0)
578 { // if end of decoding
579 w >>= shift;
580 buffered -= shift;
581 i = 0;
582 }
583 else
584 { // not end of decoding
585 w >>= 12;
586 buffered -= 12;
587 ++i;
588 }
589 }
590 } while (bits_to_decode > 0 or buffered > 0);
591 return value;
592}
593
594template <typename T>
595inline uint64_t fibonacci<T>::decode_prefix_sum(const uint64_t * d,
596 const size_type start_idx,
597 SDSL_UNUSED const size_type end_idx,
598 size_type n)
599{
600 return decode_prefix_sum(d, start_idx, n);
601}
602
603template <typename T>
605
606} // end namespace coder
607} // end namespace sdsl
608#endif
A class to encode and decode between Fibonacci and binary code.
static uint64_t * raw_data(int_vector &v)
static uint64_t decode(const uint64_t *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n Fibonacci encoded bits beginning at start_idx in the bitstring "data".
static uint64_t decode_prefix_sum(const uint64_t *d, const size_type start_idx, size_type n)
Decode n Fibonacci encoded integers beginning at start_idx in the bitstring "data" and return the sum...
static uint64_t decode_prefix_sum(const uint64_t *d, const size_type start_idx, const size_type end_idx, size_type n)
Decode n Fibonacci encoded integers beginning at start_idx and ending at end_idx (exclusive) in the b...
static uint64_t decode1(const uint64_t *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
static struct sdsl::coder::fibonacci::impl data
static bool encode(const int_vector1 &v, int_vector2 &z)
static const uint8_t min_codeword_length
static uint8_t encoding_length(uint64_t w)
Get the number of bits that are necessary to encode the value w in Fibonacci code.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
#define SDSL_UNUSED
Definition: config.hpp:13
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
static constexpr uint64_t lt_fib[92]
Array containing Fibonacci numbers less than .
Definition: bits.hpp:69
static constexpr uint32_t sel11(uint64_t x, uint32_t i, uint32_t c=0)
Calculates the position of the i-th rightmost 11-bit-pattern which terminates a Fibonacci coded integ...
Definition: bits.hpp:711
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
static constexpr void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition: bits.hpp:742
static constexpr uint32_t cnt11(uint64_t x, uint64_t &c)
Count the number of consecutive and distinct 11 in the 64bit integer x.
Definition: bits.hpp:541
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:187