SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
coder_elias_delta.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_CODER_ELIAS_DELTA
9#define SDSL_CODER_ELIAS_DELTA
10
11#include <assert.h>
12#include <stdint.h>
13
14#include <sdsl/bits.hpp>
15
16namespace sdsl
17{
18
19namespace coder
20{
21
23template <typename T = void>
24class elias_delta
25{
26public:
27 typedef uint64_t size_type;
28
29 static struct impl
30 {
32
36 uint32_t prefixsum[1 << 16];
37
38 uint16_t prefixsum_8bit[(1 << 8) * 8];
39
40 impl()
41 {
42 // initialize prefixsum
43 for (uint64_t x = 0; x < (1 << 16); ++x)
44 {
45 uint64_t const * w = &x; // copy of x
46 uint64_t value = 0;
47 uint16_t numbers = 0, offset = 0, offset2 = 0;
48 while ((x >> offset) != 0)
49 {
50 uint64_t len_1_len = bits::read_unary_bounded(w, offset), len = 0;
51 if (len_1_len == 0)
52 {
53 offset += 1;
54 value += 1;
55 ++numbers;
56 }
57 else
58 {
59 offset2 = offset + len_1_len + 1;
60 len = bits::read_int_bounded(w, offset2, len_1_len) + (1ULL << len_1_len);
61 offset2 += len_1_len;
62 if (offset2 + len - 1 <= 16)
63 {
64 value += bits::read_int_bounded(w, offset2, len - 1) + (1ULL << (len - 1));
65 offset = offset2 + len - 1;
66 ++numbers;
67 }
68 else
69 break;
70 }
71 }
72 uint32_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
73 // number of decoded values,
74 // and the last 16 bit equals value of decoded prefix sum
75 result = (offset << 24) | (numbers << 16) | value;
76 if (value > 0)
77 assert(offset > 0 and numbers > 0 and offset <= 16 and numbers <= 16);
78 prefixsum[x] = result;
79 }
80 // initialize prefixsum_8bit
81
82 for (uint32_t maxi = 1, idx = 0; maxi <= 8; ++maxi)
83 {
84 for (uint64_t x = 0; x < (1 << 8); ++x)
85 {
86 uint64_t const * w = &x; // copy of x
87 uint64_t value = 0;
88 uint32_t numbers = 0, offset = 0, offset2 = 0;
89 while ((x >> offset) != 0 and numbers < maxi)
90 {
91 uint64_t len_1_len = bits::read_unary_bounded(w, offset), len = 0;
92 if (len_1_len == 0)
93 {
94 offset += 1;
95 value += 1;
96 ++numbers;
97 }
98 else
99 {
100 offset2 = offset + len_1_len + 1;
101 len = bits::read_int_bounded(w, offset2, len_1_len) + (1ULL << len_1_len);
102 offset2 += len_1_len;
103 if (offset2 + len - 1 <= 8)
104 {
105 value += bits::read_int_bounded(w, offset2, len - 1) + (1ULL << (len - 1));
106 offset = offset2 + len - 1;
107 ++numbers;
108 }
109 else
110 break;
111 }
112 }
113 uint16_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
114 // number of decoded values,
115 // and the last 16 bit equals value of decoded prefix sum
116 result = (offset << 8) | (numbers << 4) | value;
117 prefixsum_8bit[idx++] = result;
118 }
119 }
120 }
121 } data;
122
123 static const uint8_t min_codeword_length = 1; // 1 represents 1 and is the code word with minimum length
124 static uint8_t encoding_length(uint64_t);
126 /* \param data Bitstring
127 * \param start_idx Starting index of the decoding.
128 * \param n Number of values to decode from the bitstring.
129 * \param it Iterator to decode the values.
130 */
131 template <bool t_sumup, bool t_inc, class t_iter>
132 static uint64_t decode(uint64_t const * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
133
136
141 static uint64_t decode_prefix_sum(uint64_t const * d, const size_type start_idx, size_type n);
142 static uint64_t
143 decode_prefix_sum(uint64_t const * d, const size_type start_idx, const size_type end_idx, size_type n);
144
145 template <class int_vector>
146 static bool encode(int_vector const & v, int_vector & z);
147 template <class int_vector>
148 static bool decode(int_vector const & z, int_vector & v);
149
151 /* \param x Positive integer to encode.
152 * \param z Raw data of vector to write the encoded form of x.
153 * \param start_idx Beginning bit index to write the encoded form ox x in z.
154 */
155 static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
156
157 template <class int_vector>
158 static uint64_t * raw_data(int_vector & v)
159 {
160 return v.m_data;
161 }
162};
163
164// \sa coder::elias_delta::encoding_length
165template <typename T>
166inline uint8_t elias_delta<T>::encoding_length(uint64_t w)
167{
168 uint8_t len_1 = w ? bits::hi(w) : 64;
169 return len_1 + (bits::hi(len_1 + 1) << 1) + 1;
170}
171
172template <typename T>
173template <class int_vector>
175{
176 typedef typename int_vector::size_type size_type;
177 z.width(v.width());
178 size_type z_bit_size = 0;
179 uint64_t w;
180 const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0;
181 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
182 {
183 if ((w = *it) == 0)
184 {
185 w = zero_val;
186 }
187 z_bit_size += encoding_length(w);
188 }
189 z.bit_resize(z_bit_size); // Initial size of z
190 z.shrink_to_fit();
191 if (z_bit_size & 0x3F)
192 { // if z_bit_size % 64 != 0
193 *(z.m_data + (z_bit_size >> 6)) = 0; // initialize last word
194 }
195 z_bit_size = 0;
196 uint64_t * z_data = z.m_data;
197 uint8_t offset = 0;
198 size_type len, len_1_len; // TODO: change to uint8_t and test it
199 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
200 {
201 w = *it;
202 if (w == 0)
203 {
204 w = zero_val;
205 }
206 // (number of bits to represent w)
207 len = w ? bits::hi(w) + 1 : 65;
208 // (number of bits to represent the length of w) -1
209 len_1_len = bits::hi(len);
210 // Write unary representation for the length of the length of w
211 bits::write_int_and_move(z_data, 1ULL << len_1_len, offset, len_1_len + 1);
212 if (len_1_len)
213 {
214 bits::write_int_and_move(z_data, len, offset, len_1_len);
215 bits::write_int_and_move(z_data, w, offset, len - 1);
216 }
217 }
218 return true;
219}
220
221template <typename T>
222inline void elias_delta<T>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
223{
224 uint8_t len, len_1_len;
225 // (number of bits to represent w)
226 len = x ? bits::hi(x) + 1 : 65;
227 // (number of bits to represent the length of w) - 1
228 len_1_len = bits::hi(len);
229 // Write unary representation for the length of the length of w
230 bits::write_int_and_move(z, 1ULL << len_1_len, offset, len_1_len + 1);
231 if (len_1_len)
232 {
233 bits::write_int_and_move(z, len, offset, len_1_len);
234 bits::write_int_and_move(z, x, offset, len - 1);
235 }
236}
237
238template <typename T>
239inline uint64_t
240elias_delta<T>::decode_prefix_sum(uint64_t const * d, const size_type start_idx, const size_type end_idx, size_type n)
241{
242 if (n == 0)
243 return 0;
244 uint64_t const * lastdata = d + ((end_idx + 63) >> 6);
245 d += (start_idx >> 6);
246 uint64_t w = 0, value = 0;
247 int16_t buffered = 0, read = start_idx & 0x3F;
248 size_type i = 0;
249 if (n + read <= 64)
250 {
251 if (((*d >> read) & bits::lo_set[n]) == bits::lo_set[n])
252 return n;
253 }
254 else
255 { // n+read > 64
256 if ((*d >> read) == bits::lo_set[64 - read])
257 { // all bits are set to 1
258 value = 64 - read;
259 ++d;
260 n -= (64 - read);
261 read = 0;
262 while (n >= 64)
263 {
264 if (*d == 0xFFFFFFFFFFFFFFFFULL)
265 {
266 value += 64;
267 ++d;
268 n -= 64;
269 }
270 else
271 goto start_decoding;
272 }
273 // 0 <= n <= 64
274 if ((*d & bits::lo_set[n]) == bits::lo_set[n])
275 return value + n;
276 }
277 }
278
279start_decoding:
280 while (i < n)
281 {
282 while (buffered < 64 and d < lastdata)
283 {
284 fill_buffer:
285 w |= (((*d) >> read) << buffered);
286 if (read >= buffered)
287 {
288 ++d;
289 buffered += 64 - read;
290 read = 0;
291 }
292 else
293 { // read buffered
294 read += 64 - buffered;
295 buffered = 64;
296 }
297 }
298 /* if(w==0xFFFFFFFFFFFFFFFFULL){
299 * i += 64;
300 * value += 64;
301 * if(i >= n)
302 * return value - (i-n);
303 * buffered = 0;
304 * w = 0;
305 * // continue;
306 * goto fill_buffer;
307 * }
308 */
309 // uint32_t rbp = (w == 0xFFFFFFFFFFFFFFFFULL)?64:bits::lo(~w);
310 uint32_t rbp = bits::lo(~w);
311 if (rbp > 0)
312 {
313 i += rbp;
314 value += rbp;
315 if (i >= n)
316 { // decoded too much
317 return value - (i - n); // return corrected value
318 }
319 assert((int64_t)buffered >= rbp);
320 buffered -= rbp;
321 w >>= rbp;
322 if (buffered < 16)
323 // continue;
324 goto fill_buffer;
325 }
326 // assert(w!=0xFFFFFFFFFFFFFFFFULL);
327 // else
328 {
329 // i < n
330 begin_decode:
331 uint32_t psum = elias_delta<T>::data.prefixsum[w & 0x0000FFFF];
332 if (!psum or i + ((psum >> 16) & 0x00FF) > n)
333 {
334 if (w == 0)
335 { // buffer is not full
336 w |= (((*d) >> read) << buffered);
337 if (read >= buffered)
338 {
339 ++d;
340 buffered += 64 - read;
341 read = 0;
342 }
343 else
344 {
345 read += 64 - buffered;
346 buffered = 64;
347 };
348 if (!w)
349 {
350 w |= (((*d) >> read) << buffered);
351 if (read >= buffered)
352 {
353 ++d;
354 buffered += 64 - read;
355 read = 0;
356 }
357 else
358 {
359 read += 64 - buffered;
360 buffered = 64;
361 };
362 }
363 }
364 // assert(w>0);
365 uint16_t len_1_len = bits::lo(w); // read length of length
366 buffered -= (len_1_len + 1);
367 w >>= (len_1_len + 1);
368 if (len_1_len > buffered)
369 { // buffer is not full
370 w |= (((*d) >> read) << buffered);
371 if (read >= buffered)
372 {
373 ++d;
374 buffered += 64 - read;
375 read = 0;
376 }
377 else
378 {
379 read += 64 - buffered;
380 buffered = 64;
381 };
382 if (len_1_len > buffered)
383 {
384 w |= (((*d) >> read) << buffered);
385 if (read >= buffered)
386 {
387 ++d;
388 buffered += 64 - read;
389 read = 0;
390 }
391 else
392 {
393 read += 64 - buffered;
394 buffered = 64;
395 };
396 }
397 }
398 // assert(len_1_len <= buffered);
399 uint16_t len_1 = (w & bits::lo_set[len_1_len]) + (1ULL << len_1_len) - 1;
400 buffered -= len_1_len;
401 w >>= len_1_len;
402 if (len_1 > buffered)
403 { // buffer is not full
404 w |= (((*d) >> read) << buffered);
405 if (read >= buffered)
406 {
407 ++d;
408 buffered += 64 - read;
409 read = 0;
410 }
411 else
412 {
413 read += 64 - buffered;
414 buffered = 64;
415 };
416 if (len_1 > buffered)
417 {
418 w |= (((*d) >> read) << buffered);
419 if (read >= buffered)
420 {
421 ++d;
422 buffered += 64 - read;
423 read = 0;
424 }
425 else
426 {
427 read += 64 - buffered;
428 buffered = 64;
429 };
430 }
431 }
432 // if( len_1 > buffered ){
433 // std::cerr<<"len_1="<<len_1<<" buffered = "<<buffered<<std::endl;
434 // }
435 // assert(len_1 <= buffered);
436 value += (w & bits::lo_set[len_1]) + (len_1 < 64) * (1ULL << (len_1));
437 buffered -= len_1;
438 if (len_1 < 64)
439 {
440 w >>= len_1;
441 }
442 else
443 {
444 w = 0;
445 }
446 ++i;
447 if (i == n)
448 return value;
449 if (buffered >= 16)
450 goto begin_decode;
451 }
452 else
453 {
454 value += (psum & 0x0000FFFF);
455 i += ((psum >> 16) & 0x00FF);
456 if (i == n)
457 return value;
458 buffered -= (psum >> 24);
459 w >>= (psum >> 24);
460 if (buffered >= 16)
461 goto begin_decode;
462 }
463 }
464 // std::cerr<<i<<" / "<<n<<std::endl;
465 };
466 // std::cerr<<value<<std::endl;
467 return value;
468}
469
470template <typename T>
471inline uint64_t elias_delta<T>::decode_prefix_sum(uint64_t const * d, const size_type start_idx, size_type n)
472{
473 if (n == 0)
474 return 0;
475 d += (start_idx >> 6);
476 uint64_t value = 0;
477 size_type i = 0;
478 uint8_t offset = start_idx & 0x3F;
479
480 if (n < 24)
481 {
482 if (n + offset <= 64)
483 {
484 if (((*d >> offset) & bits::lo_set[n]) == bits::lo_set[n])
485 return n;
486 }
487 else
488 { // n+offset > 64
489 if ((*d >> offset) == bits::lo_set[64 - offset])
490 { // all bits are set to 1
491 value = 64 - offset;
492 ++d;
493 n -= (64 - offset);
494 offset = 0;
495 while (n >= 64)
496 {
497 if (*d == 0xFFFFFFFFFFFFFFFFULL)
498 {
499 value += 64;
500 ++d;
501 n -= 64;
502 }
503 else
504 {
505 uint8_t temp = bits::lo(~(*d));
506 value += temp;
507 n -= temp;
508 offset = temp;
509 goto start_decoding;
510 }
511 }
512 // 0 <= n <= 64
513 if ((*d & bits::lo_set[n]) == bits::lo_set[n])
514 return value + n;
515 }
516 }
517 }
518
519start_decoding:
520
521 while (i < n)
522 { // while not all values are decoded
523 // n-i values to decode
524 if (((*d >> offset) & 0xF) == 0xF)
525 {
526 uint8_t maxdecode = n - i > 63 ? 63 : n - i;
527 uint8_t rbp = bits::lo(~bits::read_int(d, offset, maxdecode));
528 i += rbp;
529 value += rbp;
530 if (rbp + offset >= 64)
531 {
532 ++d;
533 offset = (rbp + offset) & 0x3F;
534 }
535 else
536 {
537 offset += rbp;
538 }
539 if (rbp == maxdecode)
540 continue;
541 }
542 while (i < n)
543 {
544 uint32_t psum = elias_delta<T>::data.prefixsum[bits::read_int(d, offset, 16)];
545 // if( psum == 0 or i+((psum>>16)&0x00FF) > n ){ // value does not fit in 16 bits
546 if (psum == 0)
547 { // value does not fit in 16 bits
548 goto decode_single;
549 }
550 else if (i + ((psum >> 16) & 0x00FF) > n)
551 { // decoded too much
552 if (n - i <= 8)
553 {
554 psum = elias_delta<T>::data.prefixsum_8bit[bits::read_int(d, offset, 8) | ((n - i - 1) << 8)];
555 if (psum > 0)
556 {
557 value += (psum & 0xF);
558 i += ((psum >> 4) & 0xF);
559 offset += (psum >> 8);
560 if (offset >= 64)
561 {
562 offset &= 0x3F;
563 ++d;
564 }
565 }
566 }
567 break;
568 }
569 else
570 {
571 value += (psum & 0x0000FFFF);
572 i += ((psum >> 16) & 0x00FF);
573 offset += (psum >> 24);
574 if (offset >= 64)
575 {
576 offset &= 0x3F;
577 ++d;
578 }
579 }
580 }
581 if (i < n)
582 {
583 decode_single:
584 i++;
585 uint16_t len_1_len = bits::read_unary_and_move(d, offset); // read length of length of x
586 uint16_t len_1 = bits::read_int_and_move(d, offset, len_1_len) + (1ULL << len_1_len) - 1;
587 value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << (len_1));
588 // std::cout<<"decode single ("<<len_1_len<<","<<len_1<<","<<value<<")"<<std::endl;
589 }
590 }
591 return value;
592}
593
594template <typename T>
595template <class int_vector>
597{
598 typename int_vector::size_type len_1_len, len, n = 0;
599 uint64_t const * z_data = z.data();
600 uint64_t const * z_end = z.data() + (z.bit_size() >> 6);
601 uint8_t offset = 0;
602 while ((z_data < z_end) or (z_data == z_end and offset < (z.bit_size() & 0x3F)))
603 {
604 len_1_len = bits::read_unary_and_move(z_data, offset);
605 if (len_1_len)
606 {
607 len = bits::read_int_and_move(z_data, offset, len_1_len) + (1ULL << len_1_len);
608 bits::move_right(z_data, offset, len - 1);
609 }
610 ++n;
611 }
612 v.width(z.width());
613 v.resize(n);
614 v.shrink_to_fit();
615 return decode<false, true>(z.data(), 0, n, v.begin());
616}
617
618template <typename T>
619template <bool t_sumup, bool t_inc, class t_iter>
620inline uint64_t elias_delta<T>::decode(uint64_t const * d, const size_type start_idx, size_type n, t_iter it)
621{
622 d += (start_idx >> 6);
623 uint64_t value = 0;
624 size_type i = 0;
625 size_type len_1_len, len;
626 uint8_t offset = start_idx & 0x3F;
627 while (i++ < n)
628 { // while not all values are decoded
629 if (!t_sumup)
630 value = 0;
631 len_1_len = bits::read_unary_and_move(d, offset); // read length of length of x
632 if (!len_1_len)
633 {
634 value += 1;
635 }
636 else
637 {
638 len = bits::read_int_and_move(d, offset, len_1_len) + (1ULL << len_1_len);
639 value += bits::read_int_and_move(d, offset, len - 1) + (len - 1 < 64) * (1ULL << (len - 1));
640 }
641 if (t_inc)
642 *(it++) = value;
643 }
644 return value;
645}
646
647template <typename T>
648typename elias_delta<T>::impl elias_delta<T>::data;
649
650} // end namespace coder
651} // end namespace sdsl
652#endif
bits.hpp contains the sdsl::bits class.
A class to encode and decode between Elias- and binary code.
static struct sdsl::coder::elias_delta::impl data
static const uint8_t min_codeword_length
static uint8_t encoding_length(uint64_t)
static uint64_t decode(uint64_t const *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n Elias-delta encoded bits beginning at start_idx in the bitstring "data".
static uint64_t * raw_data(int_vector &v)
static uint64_t decode_prefix_sum(uint64_t const *d, const size_type start_idx, size_type n)
Decode n Elias delta encoded integers beginning at start_idx in the bitstring "data" and return the s...
static bool encode(int_vector const &v, int_vector &z)
A generic vector class for integers of width .
Definition io.hpp:36
int_vector_size_type size_type
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
void shrink_to_fit()
Free unused allocated memory.
void resize(const size_type size)
Resize the int_vector in terms of elements.
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
size_type bit_size() const noexcept
The number of bits in the int_vector.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Namespace for the succinct data structure library.
static constexpr uint64_t read_int_bounded(uint64_t const *word, uint8_t offset=0, const uint8_t len=64)
Definition bits.hpp:793
static constexpr void move_right(uint64_t const *&word, uint8_t &offset, const uint8_t len)
Move the bit-pointer (=uint64_t word and offset) len to the right.
Definition bits.hpp:893
static constexpr uint64_t read_unary_and_move(uint64_t const *&word, uint8_t &offset)
Reads an unary decoded value from a bit position in an array and moves the bit-pointer.
Definition bits.hpp:857
static constexpr uint32_t lo(uint64_t x)
Calculates the position of the rightmost 1-bit in the 64bit integer x if it exists.
Definition bits.hpp:689
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
static constexpr void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition bits.hpp:749
static constexpr uint64_t read_int(uint64_t const *word, uint8_t offset=0, const uint8_t len=64)
Reads a value from a bit position in an array.
Definition bits.hpp:777
static constexpr uint64_t read_unary_bounded(uint64_t const *word, uint8_t offset=0)
Definition bits.hpp:843
static constexpr uint64_t read_int_and_move(uint64_t const *&word, uint8_t &offset, const uint8_t len=64)
Reads a value from a bit position in an array and moved the bit-pointer.
Definition bits.hpp:799
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition bits.hpp:194