SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
rank_support_int_v.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_RANK_SUPPORT_INT_V
10#define INCLUDED_SDSL_RANK_SUPPORT_INT_V
11
12#include <array>
13
14#include <sdsl/io.hpp>
16
17namespace sdsl
18{
19namespace detail
20{
21
31template <typename value_t, size_t bits_per_value>
33{
34 private:
35 static_assert(bits_per_value <= 64, "The maximum bit size is 64 for a value.");
36
38 static constexpr uint64_t max_size = (sizeof(uint64_t) << 3) / bits_per_value;
40 static constexpr uint64_t bit_mask = bits::lo_set[bits_per_value];
41
43 uint64_t word{};
44
45 public:
47 using size_type = size_t;
48
61
71 template <typename it_t>
72 constexpr bit_compressed_word(it_t it, it_t end) noexcept
73 {
74 assign(it, end);
75 }
76
81 constexpr value_t operator[](size_t const index) const noexcept
82 {
83 assert(index < max_size);
84 uint64_t offset = index * bits_per_value;
85 return static_cast<value_t>((word >> offset) & bit_mask);
86 }
87
91 template <typename it_t>
92 constexpr void assign(it_t it, it_t end) noexcept
93 {
94 assert(static_cast<uint64_t>(std::distance(it, end)) <= max_size);
95
96 for (size_t index = 0; it != end; ++it, ++index)
97 {
98 uint64_t offset = index * bits_per_value;
99 word = (word & ~(bit_mask << offset)) | uint64_t{ *it } << offset;
100 }
101 }
102
104 constexpr operator uint64_t() const noexcept { return word; }
105
107 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const
108 {
109 structure_tree_node * child = structure_tree::add_child(v, name, sdsl::util::class_name(*this));
110 size_type written_bytes = sdsl::serialize(word, out, child, "compressed_word");
111 structure_tree::add_size(child, written_bytes);
112 return written_bytes;
113 }
114
116 void load(std::istream & in) { sdsl::load(word, in); }
117
119 template <typename archive_t>
120 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
121 {
122 ar(CEREAL_NVP(word));
123 }
124
126 template <typename archive_t>
127 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
128 {
129 ar(CEREAL_NVP(word));
130 }
131};
132} // namespace detail
133} // namespace sdsl
134
136namespace sdsl
137{
138
155template <uint8_t alphabet_size, uint8_t words_per_block = 1, uint8_t blocks_per_superblock = 4>
156class rank_support_int_v : public rank_support_int<alphabet_size>
157{
158 private:
161
162 // Import sigma specific constants from base class.
164 using base_t::sigma;
165 using base_t::sigma_bits;
166
168 static constexpr uint64_t values_per_word{ 64ULL / sigma_bits };
170 static constexpr uint64_t values_per_block{ words_per_block * values_per_word };
172 static constexpr uint64_t values_per_superblock{ blocks_per_superblock * values_per_block };
174 static constexpr uint64_t words_per_superblock{ words_per_block * blocks_per_superblock };
176 static constexpr uint64_t effective_alphabet_size = alphabet_size - 1;
177
178 struct superblock_entry;
179
181 std::vector<superblock_entry> superblocks{};
183 typename base_t::size_type text_size{};
184
185 public:
187 using typename base_t::size_type;
189 using typename base_t::value_type;
190
200 explicit rank_support_int_v(const int_vector<> * text_ptr = nullptr)
201 : rank_support_int<alphabet_size>(nullptr)
202 {
203 static_assert(blocks_per_superblock > 1, "There must be at least two blocks per superblock!");
204
205 if (text_ptr == nullptr || text_ptr->empty()) return;
206
207 text_size = text_ptr->size();
208
209 // NOTE: number of elements is artificially increased by one because rank can be called on m_v[size()]
210 uint64_t const word_count = (text_size + values_per_word - 1) / values_per_word;
211 size_type const superblock_count = (word_count + words_per_superblock - 1) / words_per_superblock;
212
213 // Buffers to keep track of the cumulative sums for the superblocks and blocks inside of a superblock.
214 std::array<uint64_t, effective_alphabet_size> buf_blocks{};
215 std::array<uint64_t, effective_alphabet_size> buf_superblocks{};
216
217 // Allocate memory for the superblocks.
218 superblocks.resize(superblock_count);
219
220 // Iterate over the superblock entries and initialise them.
221 auto text_slice_it = text_ptr->begin();
222 uint64_t word_id = 0; // We basically iterate over all words of the underlying text.
223 for (auto entry_it = superblocks.begin(); entry_it != superblocks.end(); ++entry_it)
224 {
225 // First initialise the superblock text.
226 for (auto & compressed_word : entry_it->superblock_text)
227 {
228 // Get the text slice that can be stored in one word.
229 auto text_slice_end = std::next(text_slice_it,
230 std::min<size_t>(std::distance(text_slice_it, text_ptr->end()),
231 values_per_word));
232 compressed_word.assign(text_slice_it, text_slice_end); // Assign text slice to compressed word.
233 text_slice_it = text_slice_end; // Set to next text slice begin.
234 }
235
236 // Second initialise the superblock counts.
237 // The rank values are stored for every symbol of the alphabet in consecutive order.
238 // The last symbol can be ignored since it's prefix sum will always be same as the prefix length.
239 auto superblock_it = entry_it->superblocks.begin(); // Store the begin of the super block in the node.
240 for (size_t letter_rank = 0; letter_rank < effective_alphabet_size; ++letter_rank, ++superblock_it)
241 {
242 buf_superblocks[letter_rank] += buf_blocks[letter_rank]; // Update sum with previous superblock
243 *superblock_it = buf_superblocks[letter_rank]; // Store the counts.
244 buf_blocks[letter_rank] = 0; // Reset the block counts for the next superblock.
245 }
246
247 // Third initialise the block counts:
248 // The stored block counts represent the cumulative sum of the previous blocks in the super block.
249 // The first block of the superblock is not stored explicitly since it has no predecessor.
250 // A block stores the counts for the letters consecutive in memory from [0..max_letter] and starts then the
251 // next block at offset `i * effective_alphabet_size`, where `i` is the current block id.
252 // TODO: Make the implementation safe for multiple words per block
253 auto text_it = entry_it->superblock_text.begin();
254 for (auto block_it = entry_it->blocks.begin(); word_id < word_count && block_it != entry_it->blocks.end();
255 ++word_id, ++text_it)
256 {
257 // Get the prefix ranks for the current word for each letter and store them in the respective block
258 for (size_t letter_rank = 0; letter_rank < effective_alphabet_size; ++letter_rank, ++block_it)
259 {
260 buf_blocks[letter_rank] += base_t::full_word_prefix_rank(*text_it, letter_rank);
261 *block_it = buf_blocks[letter_rank];
262 }
263 }
264
265 // Count the last block which is not stored explicitly.
266 if (word_id < word_count)
267 {
268 for (uint64_t letter = 0; letter < effective_alphabet_size; ++letter)
269 buf_blocks[letter] += base_t::full_word_prefix_rank(*text_it, letter);
270
271 ++word_id;
272 }
273 }
274 }
275
286
293 size_type rank(const size_type position, const value_type v) const
294 {
295 switch (v)
296 {
297 case 0: return prefix_rank_impl<false>(position, v);
298 case sigma - 1: return position - prefix_rank_impl<false>(position, v - 1);
299 default: return prefix_rank_impl<true>(position, v);
300 }
301 }
302
304 inline size_type operator()(const size_type position, const value_type v) const { return rank(position, v); }
305
312 size_type prefix_rank(const size_type position, const value_type v) const
313 {
314 assert(position <= text_size);
315 assert(v <= sigma);
316
317 if (unlikely(v == sigma - 1)) return position;
318
319 return prefix_rank_impl<false>(position, v);
320 // TODO: Enable me!
321 // compute in-block queries for all words before the in-block queries
322 // this only applies when multiple words are in one block
323 // if constexpr (words_per_block > 1)
324 // {
325 // size_type const word_id{idx / values_per_word};
326 // uint64_t w{word_id - (word_id % words_per_block)};
327 // while (w < word_id)
328 // {
329 // res += this->full_word_prefix_rank(this->m_v->data(), w, v);
330 // ++w;
331 // }
332 // // std::cout << "res3=" << res << '\n';
333 // }
334 }
335
337 size_type size() const { return text_size; }
338
342 value_type value_at(const size_type position) const
343 {
344 assert(position < text_size);
345 return superblocks[to_superblock_position(position)].value_at(position);
346 }
347
349 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const
350 {
351 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
352 size_type written_bytes = sdsl::serialize(superblocks, out, child, "superblocks_vector");
353 written_bytes += write_member(text_size, out, child, "text_size");
354 structure_tree::add_size(child, written_bytes);
355 return written_bytes;
356 }
357
359 void load(std::istream & in, const int_vector<> * /*v*/)
360 {
361 this->m_v = nullptr;
362 sdsl::load(superblocks, in);
363 read_member(text_size, in);
364 }
365
367 friend bool operator==(rank_support_int_v const & lhs, rank_support_int_v const & rhs) noexcept
368 {
369 return (lhs.superblocks == rhs.superblocks) && (lhs.text_size == rhs.text_size);
370 }
371
373 friend bool operator!=(rank_support_int_v const & lhs, rank_support_int_v const & rhs) noexcept
374 {
375 return !(lhs == rhs);
376 }
377
379 template <typename archive_t>
380 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
381 {
382 ar(CEREAL_NVP(superblocks));
383 ar(CEREAL_NVP(text_size));
384 }
385
387 template <typename archive_t>
388 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
389 {
390 ar(CEREAL_NVP(superblocks));
391 ar(CEREAL_NVP(text_size));
392 }
393
395 void set_vector(const int_vector<> * /*other_text*/) {
396 } // TODO: Check where this interface is needed, since it is dangerous?
397 // I would be able to reset the text without recomputing the rank support structure which is in general a
398 // bad design.
399
400 private:
405 constexpr size_type to_superblock_position(size_t const position) const noexcept
406 {
407 return position / values_per_superblock;
408 }
409
415 template <bool compute_prefix_delta>
416 size_type prefix_rank_impl(size_type const position, const value_type v) const
417 {
418 assert(position <= text_size);
419
420 if (unlikely(text_size == 0)) // TODO: Maybe there could be some logic in the constructor for this case?
421 return 0;
422
423 superblock_entry const & entry = superblocks[to_superblock_position(position)];
424 return entry.template superblock_rank<compute_prefix_delta>(v) +
425 entry.template block_rank<compute_prefix_delta>(position, v) +
426 entry.template in_block_rank<compute_prefix_delta>(position, v);
427
428 // TODO: Enable me!
429 // if constexpr (words_per_block > 1)
430 // {
431 // size_type const word_id{position / values_per_word};
432 // uint64_t w{word_id - (word_id % words_per_block)};
433 // while (w < word_id)
434 // {
435 // res_upper += this->full_word_prefix_rank(this->m_v->data(), w, v);
436 // res_lower += this->full_word_prefix_rank(this->m_v->data(), w, v - 1);
437 // ++w;
438 // }
439 // }
440 }
441};
442
453template <uint8_t alphabet_size, uint8_t words_per_block, uint8_t blocks_per_superblock>
454struct rank_support_int_v<alphabet_size, words_per_block, blocks_per_superblock>::superblock_entry
455{
456 using size_type = typename base_t::size_type;
458 static constexpr size_t block_offset = effective_alphabet_size;
460 static constexpr size_t bits_per_block_value = ceil_log2(values_per_superblock);
462 using block_value_type = std::conditional_t<bits_per_block_value <= 8,
463 uint8_t,
464 std::conditional_t<bits_per_block_value <= 16, uint16_t, uint32_t>>;
465
467 std::array<uint64_t, (alphabet_size - 1)> superblocks;
469 std::array<block_value_type, (blocks_per_superblock - 1) * (alphabet_size - 1)> blocks;
471 std::array<detail::bit_compressed_word<uint8_t, sigma_bits>, words_per_superblock> superblock_text;
472
478 template <bool compute_prefix_delta>
479 constexpr size_type superblock_rank(value_type const v) const noexcept
480 {
481 return superblocks[v] - ((compute_prefix_delta) ? superblocks[v - 1] : 0);
482 }
483
495 template <bool compute_prefix_delta>
496 constexpr size_type block_rank(size_t const position, value_type const v) const noexcept
497 {
498 size_type const block_id = block_position_in_superblock(position);
499 size_type const block_position = absolute_block_position(block_id) + v;
500 return (block_id != 0) * (blocks[block_position] - ((compute_prefix_delta) ? blocks[block_position - 1] : 0));
501 }
502
513 template <bool compute_prefix_delta>
514 constexpr size_type in_block_rank(size_t const position, value_type const v) const noexcept
515 {
516 // First, get the local bit position within the data of the super block.
517 size_type const bit_pos = absolute_bit_position(position);
518 // Second, compute the word that contains this value.
519 uint64_t const word = superblock_text[absolute_word_position(bit_pos)];
520 // Third, compute the in-block rank given the current word.
521 return (position % values_per_block != 0) * word_prefix_rank<compute_prefix_delta>(word, bit_pos, v);
522 }
523
527 value_type value_at(size_type position) const noexcept
528 {
529 size_type bit_position = absolute_bit_position(position);
530 return superblock_text[absolute_word_position(bit_position)][position % values_per_word];
531 }
532
534 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const
535 {
536 structure_tree_node * child = structure_tree::add_child(v, name, sdsl::util::class_name(*this));
537 size_type written_bytes = 0;
538 written_bytes += sdsl::serialize(superblocks.size(), out, child, "prefix_superblock_counts");
539 for (const auto & x : superblocks) written_bytes += sdsl::serialize(x, out, child, "[]");
540
541 written_bytes += sdsl::serialize(blocks.size(), out, child, "prefix_block_counts");
542 for (const auto & x : blocks) written_bytes += sdsl::serialize(x, out, child, "[]");
543
544 written_bytes += sdsl::serialize(superblock_text.size(), out, child, "superblock_text");
545 for (const auto & x : superblock_text) written_bytes += sdsl::serialize(x, out, child, "[]");
546
547 structure_tree::add_size(child, written_bytes);
548 return written_bytes;
549 }
550
552 void load(std::istream & in)
553 {
554 size_type array_size;
555 sdsl::load(array_size, in);
556 assert(array_size == superblocks.size());
557 for (size_type idx = 0; idx < array_size; ++idx) sdsl::load(superblocks[idx], in);
558
559 sdsl::load(array_size, in);
560 assert(array_size == blocks.size());
561 for (size_type idx = 0; idx < array_size; ++idx) sdsl::load(blocks[idx], in);
562
563 sdsl::load(array_size, in);
564 assert(array_size == superblock_text.size());
565 for (size_type idx = 0; idx < array_size; ++idx) sdsl::load(superblock_text[idx], in);
566 }
567
569 friend bool operator==(superblock_entry const & lhs, superblock_entry const & rhs) noexcept
570 {
571 return (lhs.superblocks == rhs.superblocks) && (lhs.blocks == rhs.blocks) &&
572 (lhs.superblock_text == rhs.superblock_text);
573 }
574
576 friend bool operator!=(superblock_entry const & lhs, superblock_entry const & rhs) noexcept
577 {
578 return !(lhs == rhs);
579 }
580
582 template <typename archive_t>
583 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
584 {
585 ar(CEREAL_NVP(superblocks));
586 ar(CEREAL_NVP(blocks));
587 ar(CEREAL_NVP(superblock_text));
588 }
589
591 template <typename archive_t>
592 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
593 {
594 ar(CEREAL_NVP(superblocks));
595 ar(CEREAL_NVP(blocks));
596 ar(CEREAL_NVP(superblock_text));
597 }
598
599 private:
601 static constexpr size_type block_position_in_superblock(size_t const position) noexcept
602 { // if constexpr (blocks_per_superblock power of 2)
603 return (position / values_per_block) % blocks_per_superblock;
604 }
605
607 static constexpr size_type absolute_block_position(size_t const block_position) noexcept
608 {
609 return (block_position + (block_position == 0) - 1) * block_offset;
610 }
611
613 static constexpr size_type absolute_bit_position(size_t const position) noexcept
614 {
615 return (position % values_per_superblock) * sigma_bits;
616 }
617
619 static constexpr size_type absolute_word_position(size_t const bit_position) noexcept
620 { // We don't care if it overflows as we protect against it later.
621 return bit_position / bits_per_word;
622 }
623
625 template <bool compute_prefix_delta>
626 static constexpr auto word_prefix_rank(const uint64_t word, const uint64_t bit_pos, const value_type v) ->
627 typename std::enable_if<compute_prefix_delta, size_type>::type
628 {
629 auto && prefix_rank = base_t::word_prefix_rank(word, bit_pos, v - 1, v);
630 return prefix_rank[1] - prefix_rank[0];
631 }
632
634 template <bool compute_prefix_delta>
635 static constexpr auto word_prefix_rank(const uint64_t word, const uint64_t bit_pos, const value_type v) ->
636 typename std::enable_if<!compute_prefix_delta, size_type>::type
637 {
638 return base_t::word_prefix_rank(word, bit_pos, v)[0];
639 }
640};
641
642} // end namespace sdsl
643
644#endif // end file
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
void load(std::istream &in)
Loads from the stream.
bit_compressed_word(bit_compressed_word const &)=default
The copy constructor.
bit_compressed_word()=default
The default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, const std::string name="") const
Saves to the stream.
bit_compressed_word & operator=(bit_compressed_word &&)=default
The move assignment.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Saves to the archive.
constexpr void assign(it_t it, it_t end) noexcept
Assigns a range to the word.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Loads from the archive.
constexpr value_t operator[](size_t const index) const noexcept
Extracts the value from the given index.
~bit_compressed_word()=default
The destructor.
constexpr bit_compressed_word(it_t it, it_t end) noexcept
Constructs from a range of values.
bit_compressed_word(bit_compressed_word &&)=default
The move constructor.
bit_compressed_word & operator=(bit_compressed_word const &)=default
The copy assignment.
size_t size_type
The size type needed for serialisation.
A rank structure proposed by Christopher Pockrandt.
rank_support_int_v(rank_support_int_v &&)=default
Defaulted move constructor.
size_type rank(const size_type position, const value_type v) const
Counts the occurrences of v in the prefix [0..idx-1].
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Loads from the archive.
size_type operator()(const size_type position, const value_type v) const
Alias for rank(position, v)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, const std::string name="") const
Saves to the stream.
rank_support_int_v & operator=(const rank_support_int_v &)=default
Defaulted copy assignment.
friend bool operator==(rank_support_int_v const &lhs, rank_support_int_v const &rhs) noexcept
Equality operator.
rank_support_int_v & operator=(rank_support_int_v &&)=default
Defaulted move assignment.
~rank_support_int_v()=default
Defaulted destructor.
size_type prefix_rank(const size_type position, const value_type v) const
Counts the occurrences of elements smaller or equal to v in the prefix [0..idx-1].
int_vector ::value_type value_type
size_type size() const
Returns the size of the original text.
void load(std::istream &in, const int_vector<> *)
Loads from the stream.
friend bool operator!=(rank_support_int_v const &lhs, rank_support_int_v const &rhs) noexcept
Inequality operator.
rank_support_int_v(const int_vector<> *text_ptr=nullptr)
Constructs and initialises the rank support structure for the given text.
void set_vector(const int_vector<> *)
Does nothing for the rank_support_int structure.
int_vector ::size_type size_type
value_type value_at(const size_type position) const
Returns the text value at the given position.
rank_support_int_v(const rank_support_int_v &)=default
Defaulted copy constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Saves to the archive.
The base class of classes supporting rank_queries for a sdsl::int_vector in constant time.
static constexpr uint8_t sigma_bits
static constexpr std::array< uint64_t, sizeof...(value_t)> word_prefix_rank(const uint64_t word, const size_type bit_pos, const value_t... values) noexcept
Counts the occurrences of elements smaller or equal to v in the word starting at data up to position ...
static constexpr uint8_t sigma
const int_vector * m_v
Pointer to the rank supported bit_vector.
static constexpr uint8_t bits_per_word
int_vector ::value_type value_type
static constexpr uint32_t full_word_prefix_rank(const uint64_t word, const value_type v) noexcept
Counts the occurrences of v in the word starting at data up to position idx.
int_vector ::size_type size_type
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
io.hpp contains some methods for reading/writing sdsl structures.
Namespace for the succinct data structure library.
std::enable_if< has_serialize< X >::value, typenameX::size_type >::type serialize(const X &x, std::ostream &out, structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:131
constexpr size_t ceil_log2(size_t const n)
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
std::enable_if< has_load< X >::value, void >::type load(X &x, std::istream &in)
Definition: io.hpp:154
rank_support_int.hpp contains classes that support a sdsl::int_vector with constant time rank informa...
#define unlikely(x)
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:187
Stores a superblock entry in a cache friendly pattern.
value_type value_at(size_type position) const noexcept
Extracts the value at the given position.
std::array< detail::bit_compressed_word< uint8_t, sigma_bits >, words_per_superblock > superblock_text
The array storing the bit compressed text.
typename base_t::size_type size_type
friend bool operator==(superblock_entry const &lhs, superblock_entry const &rhs) noexcept
Equality operator.
std::array< block_value_type,(blocks_per_superblock - 1) *(alphabet_size - 1)> blocks
The array storing the block values.
constexpr size_type superblock_rank(value_type const v) const noexcept
Returns the rank value from the superblock.
constexpr size_type block_rank(size_t const position, value_type const v) const noexcept
Returns the rank value from the block.
void load(std::istream &in)
Loads from the stream.
constexpr size_type in_block_rank(size_t const position, value_type const v) const noexcept
Returns the rank value from the in-block query.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Loads from the archive.
std::conditional_t< bits_per_block_value<=8, uint8_t, std::conditional_t< bits_per_block_value<=16, uint16_t, uint32_t > > block_value_type
The smallest integer type needed to store the block ranks.
friend bool operator!=(superblock_entry const &lhs, superblock_entry const &rhs) noexcept
Inequality operator.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Saves to the archive.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, const std::string name="") const
Saves to the stream.