SDSL 3.0.2
Succinct Data Structure Library
Loading...
Searching...
No Matches
bit_vector_il.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef SDSL_BIT_VECTOR_IL
10#define SDSL_BIT_VECTOR_IL
11
12#include <algorithm>
13#include <assert.h>
14#include <iosfwd>
15#include <queue>
16#include <stdint.h>
17#include <string>
18#include <type_traits>
19
20#include <sdsl/bits.hpp>
21#include <sdsl/cereal.hpp>
22#include <sdsl/int_vector.hpp>
23#include <sdsl/io.hpp>
24#include <sdsl/iterators.hpp>
27#include <sdsl/util.hpp>
28
30namespace sdsl
31{
32
33template <uint8_t t_b = 1, uint32_t t_bs = 512> // forward declaration needed for friend declaration
34class rank_support_il; // in bit_vector_il
35template <uint8_t t_b = 1, uint32_t t_bs = 512> // forward declaration needed for friend declaration
36class select_support_il; // in bit_vector_il
37
38template <class T>
39constexpr bool power_of_two(T x)
40{
41 return std::is_integral<T>::value and x > 1 and !(x & (x - 1));
42}
43
45
53template <uint32_t t_bs = 512>
55{
56 static_assert(t_bs >= 64, "bit_vector_il: blocksize must be be at least 64 bits.");
57 static_assert(power_of_two(t_bs), "bit_vector_il: blocksize must be a power of two.");
58
59public:
66
67 friend class rank_support_il<1, t_bs>;
68 friend class rank_support_il<0, t_bs>;
69 friend class select_support_il<1, t_bs>;
70 friend class select_support_il<0, t_bs>;
71
76
77private:
78 size_type m_size = 0;
79 size_type m_block_num = 0;
80 size_type m_superblocks = 0;
81 size_type m_block_shift = 0;
82 int_vector<64> m_data;
83 int_vector<64> m_rank_samples;
84
85 // precondition: m_rank_samples.size() <= m_superblocks
86 void init_rank_samples()
87 {
88 uint32_t blockSize_U64 = bits::hi(t_bs >> 6);
89 size_type idx = 0;
90 std::queue<size_type> lbs, rbs;
91 lbs.push(0);
92 rbs.push(m_superblocks);
93 while (!lbs.empty())
94 {
95 size_type lb = lbs.front();
96 lbs.pop();
97 size_type rb = rbs.front();
98 rbs.pop();
99 if (/*lb < rb and*/ idx < m_rank_samples.size())
100 {
101 size_type mid = lb + (rb - lb) / 2; // select mid \in [lb..rb)
102 size_type pos = (mid << blockSize_U64) + mid;
103 m_rank_samples[idx++] = m_data[pos];
104 lbs.push(lb);
105 rbs.push(mid);
106 lbs.push(mid + 1);
107 rbs.push(rb);
108 }
109 }
110 }
111
112public:
115 bit_vector_il(bit_vector_il const &) = default;
119
121 {
122 m_size = bv.size();
123 /* calculate the number of superblocks */
124 // each block of size > 0 gets suberblock in which we store the cumulative sum up to this block
125 m_superblocks = (m_size + t_bs) / t_bs;
126 m_block_shift = bits::hi(t_bs);
127 /* allocate new data */
128 size_type blocks = (m_size + 64) / 64;
129 size_type mem = blocks + m_superblocks + 1;
130 // ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^
131 // bit vector data | cum. sum data | sum after last block
132 m_data = int_vector<64>(mem);
133 m_block_num = mem;
134
135 /* assign data and calculate super block values */
136 uint64_t const * bvp = bv.data();
137
138 size_type j = 0; // 64-bit word counter in the m_data
139 size_type cum_sum = 0;
140 size_type sample_rate = t_bs / 64;
141 for (size_type i = 0, sample_cnt = sample_rate; i < blocks; ++i, ++sample_cnt)
142 {
143 if (sample_cnt == sample_rate)
144 {
145 m_data[j] = cum_sum;
146 sample_cnt = 0;
147 j++;
148 }
149 m_data[j] = bvp[i];
150 cum_sum += bits::cnt(m_data[j]);
151 j++;
152 }
153 m_data[j] = cum_sum; /* last superblock so we can always
154 get num_ones fast */
155 if (m_block_num > 1024 * 64)
156 {
157 // we store at most m_superblocks+1 rank_samples:
158 // we do a cache efficient binary search for the select on X=1024
159 // or X=the smallest power of two smaller than m_superblock
160 m_rank_samples.resize(std::min(1024ULL, 1ULL << bits::hi(m_superblocks)));
161 }
162 init_rank_samples();
163 }
164
166
172 {
173 assert(i < m_size);
174 size_type bs = i >> m_block_shift;
175 size_type block = bs + (i >> 6) + 1;
176 return ((m_data[block] >> (i & 63)) & 1ULL);
177 }
178
180
187 uint64_t get_int(size_type idx, uint8_t len = 64) const
188 {
189 assert(idx + len - 1 < m_size);
190 size_type bs = idx >> m_block_shift;
191 size_type b_block = bs + (idx >> 6) + 1;
192 bs = (idx + len - 1) >> m_block_shift;
193 size_type e_block = bs + ((idx + len - 1) >> 6) + 1;
194 if (b_block == e_block)
195 { // spans on block
196 return (m_data[b_block] >> (idx & 63)) & bits::lo_set[len];
197 }
198 else
199 { // spans two blocks
200 uint8_t b_len = 64 - (idx & 63);
201 return (m_data[b_block] >> (idx & 63)) | (m_data[e_block] & bits::lo_set[len - b_len]) << b_len;
202 }
203 }
204
207 {
208 return m_size;
209 }
210
212 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
213 {
214 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
215 size_type written_bytes = 0;
216 written_bytes += write_member(m_size, out, child, "size");
217 written_bytes += write_member(m_block_num, out, child, "block_num");
218 written_bytes += write_member(m_superblocks, out, child, "superblocks");
219 written_bytes += write_member(m_block_shift, out, child, "block_shift");
220 written_bytes += m_data.serialize(out, child, "data");
221 written_bytes += m_rank_samples.serialize(out, child, "rank_samples");
222 structure_tree::add_size(child, written_bytes);
223 return written_bytes;
224 }
225
227 void load(std::istream & in)
228 {
229 read_member(m_size, in);
230 read_member(m_block_num, in);
231 read_member(m_superblocks, in);
232 read_member(m_block_shift, in);
233 m_data.load(in);
234 m_rank_samples.load(in);
235 }
236
237 template <typename archive_t>
238 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
239 {
240 ar(CEREAL_NVP(m_size));
241 ar(CEREAL_NVP(m_block_num));
242 ar(CEREAL_NVP(m_superblocks));
243 ar(CEREAL_NVP(m_block_shift));
244 ar(CEREAL_NVP(m_data));
245 ar(CEREAL_NVP(m_rank_samples));
246 }
247
248 template <typename archive_t>
249 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
250 {
251 ar(CEREAL_NVP(m_size));
252 ar(CEREAL_NVP(m_block_num));
253 ar(CEREAL_NVP(m_superblocks));
254 ar(CEREAL_NVP(m_block_shift));
255 ar(CEREAL_NVP(m_data));
256 ar(CEREAL_NVP(m_rank_samples));
257 }
258
260 {
261 return iterator(this, 0);
262 }
263
264 iterator end() const
265 {
266 return iterator(this, size());
267 }
268
269 bool operator==(bit_vector_il const & v) const
270 {
271 return m_size == v.m_size && m_data == v.m_data;
272 }
273
274 bool operator!=(bit_vector_il const & v) const
275 {
276 return !(*this == v);
277 }
278};
279
280template <uint8_t t_b, uint32_t t_bs>
282{
283 static_assert(t_b == 1 or t_b == 0, "rank_support_il only supports bitpatterns 0 or 1.");
284
285public:
288 enum
289 {
290 bit_pat = t_b
291 };
292 enum
293 {
294 bit_pat_len = (uint8_t)1
295 };
296
297private:
298 bit_vector_type const * m_v;
299 size_type m_block_shift;
300 size_type m_block_mask;
301 size_type m_block_size_U64;
302
303 inline size_type rank1(size_type i) const
304 {
305 size_type SBlockNum = i >> m_block_shift;
306 size_type SBlockPos = (SBlockNum << m_block_size_U64) + SBlockNum;
307 uint64_t resp = m_v->m_data[SBlockPos];
308 uint64_t const * B = (m_v->m_data.data() + (SBlockPos + 1));
309 uint64_t rem = i & 63;
310 uint64_t bits = (i & m_block_mask) - rem;
311 while (bits)
312 {
313 resp += bits::cnt(*B++);
314 bits -= 64;
315 }
316 resp += bits::cnt(*B & bits::lo_set[rem]);
317 return resp;
318 }
319
320 inline size_type rank0(size_type i) const
321 {
322 size_type SBlockNum = i >> m_block_shift;
323 size_type SBlockPos = (SBlockNum << m_block_size_U64) + SBlockNum;
324 uint64_t resp = (SBlockNum << m_block_shift) - m_v->m_data[SBlockPos];
325 uint64_t const * B = (m_v->m_data.data() + (SBlockPos + 1));
326 uint64_t rem = i & 63;
327 uint64_t bits = (i & m_block_mask) - rem;
328 while (bits)
329 {
330 resp += bits::cnt(~(*B));
331 B++;
332 bits -= 64;
333 }
334 resp += bits::cnt((~(*B)) & bits::lo_set[rem]);
335 return resp;
336 }
337
338public:
339 rank_support_il(bit_vector_type const * v = nullptr)
340 {
341 set_vector(v);
342 m_block_shift = bits::hi(t_bs);
343 m_block_mask = t_bs - 1;
344 m_block_size_U64 = bits::hi(t_bs >> 6);
345 }
346
349 {
350 if (t_b)
351 return rank1(i);
352 return rank0(i);
353 }
354
356 {
357 return rank(i);
358 }
359
361 {
362 return m_v->size();
363 }
364
365 void set_vector(bit_vector_type const * v = nullptr)
366 {
367 m_v = v;
368 }
369
371 {
372 if (this != &rs)
373 {
374 set_vector(rs.m_v);
375 }
376 return *this;
377 }
378
379 void load(std::istream &, bit_vector_type const * v = nullptr)
380 {
381 set_vector(v);
382 }
383
384 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
385 {
386 return serialize_empty_object(out, v, name, this);
387 }
388
389 template <typename archive_t>
390 void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
391 {}
392
393 template <typename archive_t>
395 {}
396
397 bool operator==(rank_support_il const & other) const noexcept
398 {
399 return (*m_v == *other.m_v);
400 }
401
402 bool operator!=(rank_support_il const & other) const noexcept
403 {
404 return !(*this == other);
405 }
406};
407
408template <uint8_t t_b, uint32_t t_bs>
410{
411 static_assert(t_b == 1 or t_b == 0, "select_support_il only supports bitpatterns 0 or 1.");
412
413public:
416 enum
417 {
418 bit_pat = t_b
419 };
420 enum
421 {
422 bit_pat_len = (uint8_t)1
423 };
424
425private:
426 bit_vector_type const * m_v;
427 size_type m_superblocks;
428 size_type m_block_shift;
429 size_type m_block_size_U64;
430
432 size_type select1(size_type i) const
433 {
434 size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb)
435 size_type res = 0;
436 size_type idx = 0; // index in m_rank_samples
437 /* binary search over super blocks */
438 // invariant: lb==0 or m_data[ pos(lb-1) ] < i
439 // m_data[ pos(rb) ] >= i, initial since i < rank(size())
440 while (lb < rb)
441 {
442 size_type mid = (lb + rb) / 2; // select mid \in [lb..rb)
443#ifndef NOSELCACHE
444 if (idx < m_v->m_rank_samples.size())
445 {
446 if (m_v->m_rank_samples[idx] >= i)
447 {
448 idx = (idx << 1) + 1;
449 rb = mid;
450 }
451 else
452 {
453 idx = (idx << 1) + 2;
454 lb = mid + 1;
455 }
456 }
457 else
458 {
459#endif
460 size_type pos = (mid << m_block_size_U64) + mid;
461 // ^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^
462 // data blocks to jump superblock position
463 if (m_v->m_data[pos] >= i)
464 {
465 rb = mid;
466 }
467 else
468 {
469 lb = mid + 1;
470 }
471#ifndef NOSELCACHE
472 }
473#endif
474 }
475 res = (rb - 1) << m_block_shift;
476 /* iterate in 64 bit steps */
477 uint64_t const * w = m_v->m_data.data() + ((rb - 1) << m_block_size_U64) + (rb - 1);
478 i -= *w; // subtract the cumulative sum before the superblock
479 ++w; /* step into the data */
480 size_type ones = bits::cnt(*w);
481 while (ones < i)
482 {
483 i -= ones;
484 ++w;
485 ones = bits::cnt(*w);
486 res += 64;
487 }
488 /* handle last word */
489 res += bits::sel(*w, i);
490 return res;
491 }
492
494 size_type select0(size_type i) const
495 {
496 size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb)
497 size_type res = 0;
498 size_type idx = 0; // index in m_rank_samples
499 /* binary search over super blocks */
500 // invariant: lb==0 or m_data[ pos(lb-1) ] < i
501 // m_data[ pos(rb) ] >= i, initial since i < rank(size())
502 while (lb < rb)
503 {
504 size_type mid = (lb + rb) / 2; // select mid \in [lb..rb)
505#ifndef NOSELCACHE
506 if (idx < m_v->m_rank_samples.size())
507 {
508 if (((mid << m_block_shift) - m_v->m_rank_samples[idx]) >= i)
509 {
510 idx = (idx << 1) + 1;
511 rb = mid;
512 }
513 else
514 {
515 idx = (idx << 1) + 2;
516 lb = mid + 1;
517 }
518 }
519 else
520 {
521#endif
522 size_type pos = (mid << m_block_size_U64) + mid;
523 // ^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^
524 // data blocks to jump superblock position
525 if (((mid << m_block_shift) - m_v->m_data[pos]) >= i)
526 {
527 rb = mid;
528 }
529 else
530 {
531 lb = mid + 1;
532 }
533#ifndef NOSELCACHE
534 }
535#endif
536 }
537 res = (rb - 1) << m_block_shift;
538
539 /* iterate in 64 bit steps */
540 uint64_t const * w = m_v->m_data.data() + ((rb - 1) << m_block_size_U64) + (rb - 1);
541 i = i - (res - *w); // substract the cumulative sum before the superblock
542 ++w; /* step into the data */
543 size_type zeros = bits::cnt(~*w);
544 while (zeros < i)
545 {
546 i -= zeros;
547 ++w;
548 zeros = bits::cnt(~*w);
549 res += 64;
550 }
551 /* handle last word */
552 res += bits::sel(~*w, i);
553 return res;
554 }
555
556public:
558 {
559 set_vector(v);
560 m_block_shift = bits::hi(t_bs);
561 m_block_size_U64 = bits::hi(t_bs >> 6);
562 }
563
566 {
567 if (t_b)
568 return select1(i);
569 return select0(i);
570 }
571
573 {
574 return select(i);
575 }
576
578 {
579 return m_v->size();
580 }
581
582 void set_vector(bit_vector_type const * v = nullptr)
583 {
584 m_v = v;
585 }
586
588 {
589 if (this != &rs)
590 {
591 set_vector(rs.m_v);
592 }
593 return *this;
594 }
595
596 void load(std::istream &, bit_vector_type const * v = nullptr)
597 {
598 set_vector(v);
599 }
600
601 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
602 {
603 return serialize_empty_object(out, v, name, this);
604 }
605
606 template <typename archive_t>
607 void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
608 {}
609
610 template <typename archive_t>
612 {}
613
614 bool operator==(select_support_il const & other) const noexcept
615 {
616 return (*m_v == *other.m_v);
617 }
618
619 bool operator!=(select_support_il const & other) const noexcept
620 {
621 return !(*this == other);
622 }
623};
624
625} // end namespace sdsl
626#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
A bit vector which interleaves the original bit_vector with rank information.
bit_vector_il(bit_vector_il &&)=default
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void load(std::istream &in)
Loads the data structure from the given istream.
bit_vector::difference_type difference_type
bit_vector_il(bit_vector const &bv)
value_type operator[](size_type i) const
Accessing the i-th element of the original bit_vector.
uint64_t get_int(size_type idx, uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx.
bit_vector_il & operator=(bit_vector_il &&)=default
bool operator==(bit_vector_il const &v) const
select_support_il< 1, t_bs > select_1_type
bit_vector_il & operator=(bit_vector_il const &)=default
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
size_type size() const
Returns the size of the original bit vector.
select_support_il< 0, t_bs > select_0_type
bit_vector_il(bit_vector_il const &)=default
rank_support_il< 1, t_bs > rank_1_type
bool operator!=(bit_vector_il const &v) const
random_access_const_iterator< bit_vector_il > iterator
iterator begin() const
bit_vector::size_type size_type
iterator end() const
rank_support_il< 0, t_bs > rank_0_type
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
A generic vector class for integers of width .
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
int_vector_size_type size_type
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Generic iterator for a random access container.
Definition iterators.hpp:24
rank_support_il & operator=(rank_support_il const &rs)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
rank_support_il(bit_vector_type const *v=nullptr)
size_type rank(size_type i) const
Returns the position of the i-th occurrence in the bit vector.
bool operator!=(rank_support_il const &other) const noexcept
size_type operator()(size_type i) const
bool operator==(rank_support_il const &other) const noexcept
size_type size() const
void load(std::istream &, bit_vector_type const *v=nullptr)
bit_vector_il< t_bs > bit_vector_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
bit_vector::size_type size_type
void set_vector(bit_vector_type const *v=nullptr)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void load(std::istream &, bit_vector_type const *v=nullptr)
size_type select(size_type i) const
Returns the position of the i-th occurrence in the bit vector.
select_support_il(bit_vector_type const *v=nullptr)
bool operator!=(select_support_il const &other) const noexcept
size_type operator()(size_type i) const
select_support_il & operator=(select_support_il const &rs)
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
bool operator==(select_support_il const &other) const noexcept
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
bit_vector_il< t_bs > bit_vector_type
bit_vector::size_type size_type
void set_vector(bit_vector_type const *v=nullptr)
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
bits_impl<> bits
Definition bits.hpp:978
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition io.hpp:94
void read_member(T &t, std::istream &in)
Definition io.hpp:119
constexpr bool power_of_two(T x)
size_t serialize_empty_object(std::ostream &, structure_tree_node *v=nullptr, std::string name="", T const *t=nullptr)
Definition io.hpp:339
Contains declarations and definitions of data structure concepts.
A helper class for bitwise tricks on 64 bit words.
Definition bits.hpp:55
static constexpr uint32_t sel(uint64_t x, uint32_t i)
Calculate the position of the i-th rightmost 1 bit in the 64bit integer x.
Definition bits.hpp:585
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:652
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition bits.hpp:485
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition bits.hpp:193
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.