SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
coder_elias_delta.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_CODER_ELIAS_DELTA
9#define SDSL_CODER_ELIAS_DELTA
10
11#include <sdsl/int_vector.hpp>
12
13namespace sdsl
14{
15
16namespace coder
17{
18
20template <typename T = void>
22{
23 public:
24 typedef uint64_t size_type;
25
26 static struct impl
27 {
29
33 uint32_t prefixsum[1 << 16];
34
35 uint16_t prefixsum_8bit[(1 << 8) * 8];
36
37 impl()
38 {
39 // initialize prefixsum
40 for (uint64_t x = 0; x < (1 << 16); ++x)
41 {
42 const uint64_t * w = &x; // copy of x
43 uint64_t value = 0;
44 uint16_t numbers = 0, offset = 0, offset2 = 0;
45 while ((x >> offset) != 0)
46 {
47 uint64_t len_1_len = bits::read_unary_bounded(w, offset), len = 0;
48 if (len_1_len == 0)
49 {
50 offset += 1;
51 value += 1;
52 ++numbers;
53 }
54 else
55 {
56 offset2 = offset + len_1_len + 1;
57 len = bits::read_int_bounded(w, offset2, len_1_len) + (1ULL << len_1_len);
58 offset2 += len_1_len;
59 if (offset2 + len - 1 <= 16)
60 {
61 value += bits::read_int_bounded(w, offset2, len - 1) + (1ULL << (len - 1));
62 offset = offset2 + len - 1;
63 ++numbers;
64 }
65 else
66 break;
67 }
68 }
69 uint32_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
70 // number of decoded values,
71 // and the last 16 bit equals value of decoded prefix sum
72 result = (offset << 24) | (numbers << 16) | value;
73 if (value > 0) assert(offset > 0 and numbers > 0 and offset <= 16 and numbers <= 16);
74 prefixsum[x] = result;
75 }
76 // initialize prefixsum_8bit
77
78 for (uint32_t maxi = 1, idx = 0; maxi <= 8; ++maxi)
79 {
80 for (uint64_t x = 0; x < (1 << 8); ++x)
81 {
82 const uint64_t * w = &x; // copy of x
83 uint64_t value = 0;
84 uint32_t numbers = 0, offset = 0, offset2 = 0;
85 while ((x >> offset) != 0 and numbers < maxi)
86 {
87 uint64_t len_1_len = bits::read_unary_bounded(w, offset), len = 0;
88 if (len_1_len == 0)
89 {
90 offset += 1;
91 value += 1;
92 ++numbers;
93 }
94 else
95 {
96 offset2 = offset + len_1_len + 1;
97 len = bits::read_int_bounded(w, offset2, len_1_len) + (1ULL << len_1_len);
98 offset2 += len_1_len;
99 if (offset2 + len - 1 <= 8)
100 {
101 value += bits::read_int_bounded(w, offset2, len - 1) + (1ULL << (len - 1));
102 offset = offset2 + len - 1;
103 ++numbers;
104 }
105 else
106 break;
107 }
108 }
109 uint16_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
110 // number of decoded values,
111 // and the last 16 bit equals value of decoded prefix sum
112 result = (offset << 8) | (numbers << 4) | value;
113 prefixsum_8bit[idx++] = result;
114 }
115 }
116 }
117 } data;
118
119 static const uint8_t min_codeword_length = 1; // 1 represents 1 and is the code word with minimum length
120 static uint8_t encoding_length(uint64_t);
122 /* \param data Bitstring
123 * \param start_idx Starting index of the decoding.
124 * \param n Number of values to decode from the bitstring.
125 * \param it Iterator to decode the values.
126 */
127 template <bool t_sumup, bool t_inc, class t_iter>
128 static uint64_t decode(const uint64_t * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
129
132
137 static uint64_t decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n);
138 static uint64_t decode_prefix_sum(const uint64_t * d,
139 const size_type start_idx,
140 const size_type end_idx,
141 size_type n);
142
143 template <class int_vector>
144 static bool encode(const int_vector & v, int_vector & z);
145 template <class int_vector>
146 static bool decode(const int_vector & z, int_vector & v);
147
149 /* \param x Positive integer to encode.
150 * \param z Raw data of vector to write the encoded form of x.
151 * \param start_idx Beginning bit index to write the encoded form ox x in z.
152 */
153 static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
154
155 template <class int_vector>
156 static uint64_t * raw_data(int_vector & v)
157 {
158 return v.m_data;
159 }
160};
161
162// \sa coder::elias_delta::encoding_length
163template <typename T>
164inline uint8_t elias_delta<T>::encoding_length(uint64_t w)
165{
166 uint8_t len_1 = w ? bits::hi(w) : 64;
167 return len_1 + (bits::hi(len_1 + 1) << 1) + 1;
168}
169
170template <typename T>
171template <class int_vector>
173{
174 typedef typename int_vector::size_type size_type;
175 z.width(v.width());
176 size_type z_bit_size = 0;
177 uint64_t w;
178 const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0;
179 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
180 {
181 if ((w = *it) == 0) { w = zero_val; }
182 z_bit_size += encoding_length(w);
183 }
184 z.bit_resize(z_bit_size); // Initial size of z
185 z.shrink_to_fit();
186 if (z_bit_size & 0x3F)
187 { // if z_bit_size % 64 != 0
188 *(z.m_data + (z_bit_size >> 6)) = 0; // initialize last word
189 }
190 z_bit_size = 0;
191 uint64_t * z_data = z.m_data;
192 uint8_t offset = 0;
193 size_type len, len_1_len; // TODO: change to uint8_t and test it
194 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
195 {
196 w = *it;
197 if (w == 0) { w = zero_val; }
198 // (number of bits to represent w)
199 len = w ? bits::hi(w) + 1 : 65;
200 // (number of bits to represent the length of w) -1
201 len_1_len = bits::hi(len);
202 // Write unary representation for the length of the length of w
203 bits::write_int_and_move(z_data, 1ULL << len_1_len, offset, len_1_len + 1);
204 if (len_1_len)
205 {
206 bits::write_int_and_move(z_data, len, offset, len_1_len);
207 bits::write_int_and_move(z_data, w, offset, len - 1);
208 }
209 }
210 return true;
211}
212
213template <typename T>
214inline void elias_delta<T>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
215{
216 uint8_t len, len_1_len;
217 // (number of bits to represent w)
218 len = x ? bits::hi(x) + 1 : 65;
219 // (number of bits to represent the length of w) - 1
220 len_1_len = bits::hi(len);
221 // Write unary representation for the length of the length of w
222 bits::write_int_and_move(z, 1ULL << len_1_len, offset, len_1_len + 1);
223 if (len_1_len)
224 {
225 bits::write_int_and_move(z, len, offset, len_1_len);
226 bits::write_int_and_move(z, x, offset, len - 1);
227 }
228}
229
230template <typename T>
231inline uint64_t elias_delta<T>::decode_prefix_sum(const uint64_t * d,
232 const size_type start_idx,
233 const size_type end_idx,
234 size_type n)
235{
236 if (n == 0) return 0;
237 const uint64_t * lastdata = d + ((end_idx + 63) >> 6);
238 d += (start_idx >> 6);
239 uint64_t w = 0, value = 0;
240 int16_t buffered = 0, read = start_idx & 0x3F;
241 size_type i = 0;
242 if (n + read <= 64)
243 {
244 if (((*d >> read) & bits::lo_set[n]) == bits::lo_set[n]) return n;
245 }
246 else
247 { // n+read > 64
248 if ((*d >> read) == bits::lo_set[64 - read])
249 { // all bits are set to 1
250 value = 64 - read;
251 ++d;
252 n -= (64 - read);
253 read = 0;
254 while (n >= 64)
255 {
256 if (*d == 0xFFFFFFFFFFFFFFFFULL)
257 {
258 value += 64;
259 ++d;
260 n -= 64;
261 }
262 else
263 goto start_decoding;
264 }
265 // 0 <= n <= 64
266 if ((*d & bits::lo_set[n]) == bits::lo_set[n]) return value + n;
267 }
268 }
269
270start_decoding:
271 while (i < n)
272 {
273 while (buffered < 64 and d < lastdata)
274 {
275 fill_buffer:
276 w |= (((*d) >> read) << buffered);
277 if (read >= buffered)
278 {
279 ++d;
280 buffered += 64 - read;
281 read = 0;
282 }
283 else
284 { // read buffered
285 read += 64 - buffered;
286 buffered = 64;
287 }
288 }
289 /* if(w==0xFFFFFFFFFFFFFFFFULL){
290 * i += 64;
291 * value += 64;
292 * if(i >= n)
293 * return value - (i-n);
294 * buffered = 0;
295 * w = 0;
296 * // continue;
297 * goto fill_buffer;
298 * }
299 */
300 // uint32_t rbp = (w == 0xFFFFFFFFFFFFFFFFULL)?64:bits::lo(~w);
301 uint32_t rbp = bits::lo(~w);
302 if (rbp > 0)
303 {
304 i += rbp;
305 value += rbp;
306 if (i >= n)
307 { // decoded too much
308 return value - (i - n); // return corrected value
309 }
310 assert((int64_t)buffered >= rbp);
311 buffered -= rbp;
312 w >>= rbp;
313 if (buffered < 16)
314 // continue;
315 goto fill_buffer;
316 }
317 // assert(w!=0xFFFFFFFFFFFFFFFFULL);
318 // else
319 {
320 // i < n
321 begin_decode:
322 uint32_t psum = elias_delta<T>::data.prefixsum[w & 0x0000FFFF];
323 if (!psum or i + ((psum >> 16) & 0x00FF) > n)
324 {
325 if (w == 0)
326 { // buffer is not full
327 w |= (((*d) >> read) << buffered);
328 if (read >= buffered)
329 {
330 ++d;
331 buffered += 64 - read;
332 read = 0;
333 }
334 else
335 {
336 read += 64 - buffered;
337 buffered = 64;
338 };
339 if (!w)
340 {
341 w |= (((*d) >> read) << buffered);
342 if (read >= buffered)
343 {
344 ++d;
345 buffered += 64 - read;
346 read = 0;
347 }
348 else
349 {
350 read += 64 - buffered;
351 buffered = 64;
352 };
353 }
354 }
355 // assert(w>0);
356 uint16_t len_1_len = bits::lo(w); // read length of length
357 buffered -= (len_1_len + 1);
358 w >>= (len_1_len + 1);
359 if (len_1_len > buffered)
360 { // buffer is not full
361 w |= (((*d) >> read) << buffered);
362 if (read >= buffered)
363 {
364 ++d;
365 buffered += 64 - read;
366 read = 0;
367 }
368 else
369 {
370 read += 64 - buffered;
371 buffered = 64;
372 };
373 if (len_1_len > buffered)
374 {
375 w |= (((*d) >> read) << buffered);
376 if (read >= buffered)
377 {
378 ++d;
379 buffered += 64 - read;
380 read = 0;
381 }
382 else
383 {
384 read += 64 - buffered;
385 buffered = 64;
386 };
387 }
388 }
389 // assert(len_1_len <= buffered);
390 uint16_t len_1 = (w & bits::lo_set[len_1_len]) + (1ULL << len_1_len) - 1;
391 buffered -= len_1_len;
392 w >>= len_1_len;
393 if (len_1 > buffered)
394 { // buffer is not full
395 w |= (((*d) >> read) << buffered);
396 if (read >= buffered)
397 {
398 ++d;
399 buffered += 64 - read;
400 read = 0;
401 }
402 else
403 {
404 read += 64 - buffered;
405 buffered = 64;
406 };
407 if (len_1 > buffered)
408 {
409 w |= (((*d) >> read) << buffered);
410 if (read >= buffered)
411 {
412 ++d;
413 buffered += 64 - read;
414 read = 0;
415 }
416 else
417 {
418 read += 64 - buffered;
419 buffered = 64;
420 };
421 }
422 }
423 // if( len_1 > buffered ){
424 // std::cerr<<"len_1="<<len_1<<" buffered = "<<buffered<<std::endl;
425 // }
426 // assert(len_1 <= buffered);
427 value += (w & bits::lo_set[len_1]) + (len_1 < 64) * (1ULL << (len_1));
428 buffered -= len_1;
429 if (len_1 < 64) { w >>= len_1; }
430 else
431 {
432 w = 0;
433 }
434 ++i;
435 if (i == n) return value;
436 if (buffered >= 16) goto begin_decode;
437 }
438 else
439 {
440 value += (psum & 0x0000FFFF);
441 i += ((psum >> 16) & 0x00FF);
442 if (i == n) return value;
443 buffered -= (psum >> 24);
444 w >>= (psum >> 24);
445 if (buffered >= 16) goto begin_decode;
446 }
447 }
448 // std::cerr<<i<<" / "<<n<<std::endl;
449 };
450 // std::cerr<<value<<std::endl;
451 return value;
452}
453
454template <typename T>
455inline uint64_t elias_delta<T>::decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n)
456{
457 if (n == 0) return 0;
458 d += (start_idx >> 6);
459 uint64_t value = 0;
460 size_type i = 0;
461 uint8_t offset = start_idx & 0x3F;
462
463 if (n < 24)
464 {
465 if (n + offset <= 64)
466 {
467 if (((*d >> offset) & bits::lo_set[n]) == bits::lo_set[n]) return n;
468 }
469 else
470 { // n+offset > 64
471 if ((*d >> offset) == bits::lo_set[64 - offset])
472 { // all bits are set to 1
473 value = 64 - offset;
474 ++d;
475 n -= (64 - offset);
476 offset = 0;
477 while (n >= 64)
478 {
479 if (*d == 0xFFFFFFFFFFFFFFFFULL)
480 {
481 value += 64;
482 ++d;
483 n -= 64;
484 }
485 else
486 {
487 uint8_t temp = bits::lo(~(*d));
488 value += temp;
489 n -= temp;
490 offset = temp;
491 goto start_decoding;
492 }
493 }
494 // 0 <= n <= 64
495 if ((*d & bits::lo_set[n]) == bits::lo_set[n]) return value + n;
496 }
497 }
498 }
499
500start_decoding:
501
502 while (i < n)
503 { // while not all values are decoded
504 // n-i values to decode
505 if (((*d >> offset) & 0xF) == 0xF)
506 {
507 uint8_t maxdecode = n - i > 63 ? 63 : n - i;
508 uint8_t rbp = bits::lo(~bits::read_int(d, offset, maxdecode));
509 i += rbp;
510 value += rbp;
511 if (rbp + offset >= 64)
512 {
513 ++d;
514 offset = (rbp + offset) & 0x3F;
515 }
516 else
517 {
518 offset += rbp;
519 }
520 if (rbp == maxdecode) continue;
521 }
522 while (i < n)
523 {
524 uint32_t psum = elias_delta<T>::data.prefixsum[bits::read_int(d, offset, 16)];
525 // if( psum == 0 or i+((psum>>16)&0x00FF) > n ){ // value does not fit in 16 bits
526 if (psum == 0)
527 { // value does not fit in 16 bits
528 goto decode_single;
529 }
530 else if (i + ((psum >> 16) & 0x00FF) > n)
531 { // decoded too much
532 if (n - i <= 8)
533 {
534 psum = elias_delta<T>::data.prefixsum_8bit[bits::read_int(d, offset, 8) | ((n - i - 1) << 8)];
535 if (psum > 0)
536 {
537 value += (psum & 0xF);
538 i += ((psum >> 4) & 0xF);
539 offset += (psum >> 8);
540 if (offset >= 64)
541 {
542 offset &= 0x3F;
543 ++d;
544 }
545 }
546 }
547 break;
548 }
549 else
550 {
551 value += (psum & 0x0000FFFF);
552 i += ((psum >> 16) & 0x00FF);
553 offset += (psum >> 24);
554 if (offset >= 64)
555 {
556 offset &= 0x3F;
557 ++d;
558 }
559 }
560 }
561 if (i < n)
562 {
563 decode_single:
564 i++;
565 uint16_t len_1_len = bits::read_unary_and_move(d, offset); // read length of length of x
566 uint16_t len_1 = bits::read_int_and_move(d, offset, len_1_len) + (1ULL << len_1_len) - 1;
567 value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << (len_1));
568 // std::cout<<"decode single ("<<len_1_len<<","<<len_1<<","<<value<<")"<<std::endl;
569 }
570 }
571 return value;
572}
573
574template <typename T>
575template <class int_vector>
577{
578 typename int_vector::size_type len_1_len, len, n = 0;
579 const uint64_t * z_data = z.data();
580 const uint64_t * z_end = z.data() + (z.bit_size() >> 6);
581 uint8_t offset = 0;
582 while ((z_data < z_end) or (z_data == z_end and offset < (z.bit_size() & 0x3F)))
583 {
584 len_1_len = bits::read_unary_and_move(z_data, offset);
585 if (len_1_len)
586 {
587 len = bits::read_int_and_move(z_data, offset, len_1_len) + (1ULL << len_1_len);
588 bits::move_right(z_data, offset, len - 1);
589 }
590 ++n;
591 }
592 v.width(z.width());
593 v.resize(n);
594 v.shrink_to_fit();
595 return decode<false, true>(z.data(), 0, n, v.begin());
596}
597
598template <typename T>
599template <bool t_sumup, bool t_inc, class t_iter>
600inline uint64_t elias_delta<T>::decode(const uint64_t * d, const size_type start_idx, size_type n, t_iter it)
601{
602 d += (start_idx >> 6);
603 uint64_t value = 0;
604 size_type i = 0;
605 size_type len_1_len, len;
606 uint8_t offset = start_idx & 0x3F;
607 while (i++ < n)
608 { // while not all values are decoded
609 if (!t_sumup) value = 0;
610 len_1_len = bits::read_unary_and_move(d, offset); // read length of length of x
611 if (!len_1_len) { value += 1; }
612 else
613 {
614 len = bits::read_int_and_move(d, offset, len_1_len) + (1ULL << len_1_len);
615 value += bits::read_int_and_move(d, offset, len - 1) + (len - 1 < 64) * (1ULL << (len - 1));
616 }
617 if (t_inc) *(it++) = value;
618 }
619 return value;
620}
621
622template <typename T>
624
625} // end namespace coder
626} // end namespace sdsl
627#endif
A class to encode and decode between Elias- and binary code.
static bool encode(const int_vector &v, int_vector &z)
static uint64_t decode_prefix_sum(const uint64_t *d, const size_type start_idx, size_type n)
Decode n Elias delta encoded integers beginning at start_idx in the bitstring "data" and return the s...
static struct sdsl::coder::elias_delta::impl data
static const uint8_t min_codeword_length
static uint8_t encoding_length(uint64_t)
static uint64_t * raw_data(int_vector &v)
static uint64_t decode(const uint64_t *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n Elias-delta encoded bits beginning at start_idx in the bitstring "data".
A generic vector class for integers of width .
Definition: int_vector.hpp:216
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:764
int_vector_size_type size_type
Definition: int_vector.hpp:229
size_type bit_size() const noexcept
The number of bits in the int_vector.
Definition: int_vector.hpp:547
void shrink_to_fit()
Free unused allocated memory.
Definition: int_vector.hpp:506
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:595
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:521
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:759
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
static constexpr uint64_t read_int(const uint64_t *word, uint8_t offset=0, const uint8_t len=64)
Reads a value from a bit position in an array.
Definition: bits.hpp:770
static constexpr uint32_t lo(uint64_t x)
Calculates the position of the rightmost 1-bit in the 64bit integer x if it exists.
Definition: bits.hpp:686
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
static constexpr void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition: bits.hpp:742
static constexpr uint64_t read_int_bounded(const uint64_t *word, uint8_t offset=0, const uint8_t len=64)
Definition: bits.hpp:786
static constexpr uint64_t read_unary_and_move(const uint64_t *&word, uint8_t &offset)
Reads an unary decoded value from a bit position in an array and moves the bit-pointer.
Definition: bits.hpp:842
static constexpr uint64_t read_unary_bounded(const uint64_t *word, uint8_t offset=0)
Definition: bits.hpp:831
static constexpr uint64_t read_int_and_move(const uint64_t *&word, uint8_t &offset, const uint8_t len=64)
Reads a value from a bit position in an array and moved the bit-pointer.
Definition: bits.hpp:792
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:187
static constexpr void move_right(const uint64_t *&word, uint8_t &offset, const uint8_t len)
Move the bit-pointer (=uint64_t word and offset) len to the right.
Definition: bits.hpp:877