9#ifndef INCLUDED_SDSL_RANK_SUPPORT_INT_V
10#define INCLUDED_SDSL_RANK_SUPPORT_INT_V
31template <
typename value_t,
size_t bits_per_value>
35 static_assert(bits_per_value <= 64,
"The maximum bit size is 64 for a value.");
38 static constexpr uint64_t max_size = (
sizeof(uint64_t) << 3) / bits_per_value;
40 static constexpr uint64_t bit_mask =
bits::lo_set[bits_per_value];
71 template <
typename it_t>
81 constexpr value_t
operator[](
size_t const index)
const noexcept
83 assert(index < max_size);
84 uint64_t offset = index * bits_per_value;
85 return static_cast<value_t
>((word >> offset) & bit_mask);
91 template <
typename it_t>
92 constexpr void assign(it_t it, it_t end)
noexcept
94 assert(
static_cast<uint64_t
>(std::distance(it, end)) <= max_size);
96 for (
size_t index = 0; it != end; ++it, ++index)
98 uint64_t offset = index * bits_per_value;
99 word = (word & ~(bit_mask << offset)) | uint64_t{ *it } << offset;
104 constexpr operator uint64_t() const noexcept {
return word; }
112 return written_bytes;
119 template <
typename archive_t>
126 template <
typename archive_t>
155template <u
int8_t alphabet_size, u
int8_t words_per_block = 1, u
int8_t blocks_per_superblock = 4>
168 static constexpr uint64_t values_per_word{ 64ULL / sigma_bits };
170 static constexpr uint64_t values_per_block{ words_per_block * values_per_word };
172 static constexpr uint64_t values_per_superblock{ blocks_per_superblock * values_per_block };
174 static constexpr uint64_t words_per_superblock{ words_per_block * blocks_per_superblock };
176 static constexpr uint64_t effective_alphabet_size = alphabet_size - 1;
181 std::vector<superblock_entry> superblocks{};
203 static_assert(blocks_per_superblock > 1,
"There must be at least two blocks per superblock!");
205 if (text_ptr ==
nullptr || text_ptr->empty())
return;
207 text_size = text_ptr->size();
210 uint64_t
const word_count = (text_size + values_per_word - 1) / values_per_word;
211 size_type const superblock_count = (word_count + words_per_superblock - 1) / words_per_superblock;
214 std::array<uint64_t, effective_alphabet_size> buf_blocks{};
215 std::array<uint64_t, effective_alphabet_size> buf_superblocks{};
218 superblocks.resize(superblock_count);
221 auto text_slice_it = text_ptr->begin();
222 uint64_t word_id = 0;
223 for (
auto entry_it = superblocks.begin(); entry_it != superblocks.end(); ++entry_it)
226 for (
auto & compressed_word : entry_it->superblock_text)
229 auto text_slice_end = std::next(text_slice_it,
230 std::min<size_t>(std::distance(text_slice_it, text_ptr->end()),
232 compressed_word.assign(text_slice_it, text_slice_end);
233 text_slice_it = text_slice_end;
239 auto superblock_it = entry_it->superblocks.begin();
240 for (
size_t letter_rank = 0; letter_rank < effective_alphabet_size; ++letter_rank, ++superblock_it)
242 buf_superblocks[letter_rank] += buf_blocks[letter_rank];
243 *superblock_it = buf_superblocks[letter_rank];
244 buf_blocks[letter_rank] = 0;
253 auto text_it = entry_it->superblock_text.begin();
254 for (
auto block_it = entry_it->blocks.begin(); word_id < word_count && block_it != entry_it->blocks.end();
255 ++word_id, ++text_it)
258 for (
size_t letter_rank = 0; letter_rank < effective_alphabet_size; ++letter_rank, ++block_it)
261 *block_it = buf_blocks[letter_rank];
266 if (word_id < word_count)
268 for (uint64_t letter = 0; letter < effective_alphabet_size; ++letter)
297 case 0:
return prefix_rank_impl<false>(position, v);
298 case sigma - 1:
return position - prefix_rank_impl<false>(position, v - 1);
299 default:
return prefix_rank_impl<true>(position, v);
314 assert(position <= text_size);
317 if (
unlikely(v == sigma - 1))
return position;
319 return prefix_rank_impl<false>(position, v);
344 assert(position < text_size);
345 return superblocks[to_superblock_position(position)].value_at(position);
353 written_bytes +=
write_member(text_size, out, child,
"text_size");
355 return written_bytes;
369 return (lhs.superblocks == rhs.superblocks) && (lhs.text_size == rhs.text_size);
375 return !(lhs == rhs);
379 template <
typename archive_t>
387 template <
typename archive_t>
405 constexpr size_type to_superblock_position(
size_t const position)
const noexcept
407 return position / values_per_superblock;
415 template <
bool compute_prefix_delta>
418 assert(position <= text_size);
423 superblock_entry
const & entry = superblocks[to_superblock_position(position)];
424 return entry.template superblock_rank<compute_prefix_delta>(v) +
425 entry.template block_rank<compute_prefix_delta>(position, v) +
426 entry.template in_block_rank<compute_prefix_delta>(position, v);
453template <u
int8_t alphabet_size, u
int8_t words_per_block, u
int8_t blocks_per_superblock>
458 static constexpr size_t block_offset = effective_alphabet_size;
460 static constexpr size_t bits_per_block_value =
ceil_log2(values_per_superblock);
464 std::conditional_t<bits_per_block_value <= 16, uint16_t, uint32_t>>;
467 std::array<uint64_t, (alphabet_size - 1)> superblocks;
471 std::array<detail::bit_compressed_word<uint8_t, sigma_bits>, words_per_superblock>
superblock_text;
478 template <
bool compute_prefix_delta>
481 return superblocks[v] - ((compute_prefix_delta) ? superblocks[v - 1] : 0);
495 template <
bool compute_prefix_delta>
498 size_type const block_id = block_position_in_superblock(position);
499 size_type const block_position = absolute_block_position(block_id) + v;
500 return (block_id != 0) * (blocks[block_position] - ((compute_prefix_delta) ? blocks[block_position - 1] : 0));
513 template <
bool compute_prefix_delta>
517 size_type const bit_pos = absolute_bit_position(position);
519 uint64_t
const word = superblock_text[absolute_word_position(bit_pos)];
521 return (position % values_per_block != 0) * word_prefix_rank<compute_prefix_delta>(word, bit_pos, v);
529 size_type bit_position = absolute_bit_position(position);
530 return superblock_text[absolute_word_position(bit_position)][position % values_per_word];
538 written_bytes +=
sdsl::serialize(superblocks.size(), out, child,
"prefix_superblock_counts");
539 for (
const auto & x : superblocks) written_bytes +=
sdsl::serialize(x, out, child,
"[]");
541 written_bytes +=
sdsl::serialize(blocks.size(), out, child,
"prefix_block_counts");
542 for (
const auto & x : blocks) written_bytes +=
sdsl::serialize(x, out, child,
"[]");
544 written_bytes +=
sdsl::serialize(superblock_text.size(), out, child,
"superblock_text");
545 for (
const auto & x : superblock_text) written_bytes +=
sdsl::serialize(x, out, child,
"[]");
548 return written_bytes;
556 assert(array_size == superblocks.size());
560 assert(array_size == blocks.size());
564 assert(array_size == superblock_text.size());
571 return (lhs.superblocks == rhs.superblocks) && (lhs.blocks == rhs.blocks) &&
572 (lhs.superblock_text == rhs.superblock_text);
578 return !(lhs == rhs);
582 template <
typename archive_t>
591 template <
typename archive_t>
601 static constexpr size_type block_position_in_superblock(
size_t const position)
noexcept
603 return (position / values_per_block) % blocks_per_superblock;
607 static constexpr size_type absolute_block_position(
size_t const block_position)
noexcept
609 return (block_position + (block_position == 0) - 1) * block_offset;
613 static constexpr size_type absolute_bit_position(
size_t const position)
noexcept
615 return (position % values_per_superblock) * sigma_bits;
619 static constexpr size_type absolute_word_position(
size_t const bit_position)
noexcept
621 return bit_position / bits_per_word;
625 template <
bool compute_prefix_delta>
627 typename std::enable_if<compute_prefix_delta, size_type>::type
634 template <
bool compute_prefix_delta>
636 typename std::enable_if<!compute_prefix_delta, size_type>::type
void load(std::istream &in)
Loads from the stream.
bit_compressed_word(bit_compressed_word const &)=default
The copy constructor.
bit_compressed_word()=default
The default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, const std::string name="") const
Saves to the stream.
bit_compressed_word & operator=(bit_compressed_word &&)=default
The move assignment.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Saves to the archive.
constexpr void assign(it_t it, it_t end) noexcept
Assigns a range to the word.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Loads from the archive.
constexpr value_t operator[](size_t const index) const noexcept
Extracts the value from the given index.
~bit_compressed_word()=default
The destructor.
constexpr bit_compressed_word(it_t it, it_t end) noexcept
Constructs from a range of values.
bit_compressed_word(bit_compressed_word &&)=default
The move constructor.
bit_compressed_word & operator=(bit_compressed_word const &)=default
The copy assignment.
size_t size_type
The size type needed for serialisation.
A rank structure proposed by Christopher Pockrandt.
rank_support_int_v(rank_support_int_v &&)=default
Defaulted move constructor.
size_type rank(const size_type position, const value_type v) const
Counts the occurrences of v in the prefix [0..idx-1].
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Loads from the archive.
size_type operator()(const size_type position, const value_type v) const
Alias for rank(position, v)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, const std::string name="") const
Saves to the stream.
rank_support_int_v & operator=(const rank_support_int_v &)=default
Defaulted copy assignment.
friend bool operator==(rank_support_int_v const &lhs, rank_support_int_v const &rhs) noexcept
Equality operator.
rank_support_int_v & operator=(rank_support_int_v &&)=default
Defaulted move assignment.
~rank_support_int_v()=default
Defaulted destructor.
size_type prefix_rank(const size_type position, const value_type v) const
Counts the occurrences of elements smaller or equal to v in the prefix [0..idx-1].
int_vector ::value_type value_type
size_type size() const
Returns the size of the original text.
void load(std::istream &in, const int_vector<> *)
Loads from the stream.
friend bool operator!=(rank_support_int_v const &lhs, rank_support_int_v const &rhs) noexcept
Inequality operator.
rank_support_int_v(const int_vector<> *text_ptr=nullptr)
Constructs and initialises the rank support structure for the given text.
void set_vector(const int_vector<> *)
Does nothing for the rank_support_int structure.
int_vector ::size_type size_type
value_type value_at(const size_type position) const
Returns the text value at the given position.
rank_support_int_v(const rank_support_int_v &)=default
Defaulted copy constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Saves to the archive.
The base class of classes supporting rank_queries for a sdsl::int_vector in constant time.
static constexpr uint8_t sigma_bits
static constexpr std::array< uint64_t, sizeof...(value_t)> word_prefix_rank(const uint64_t word, const size_type bit_pos, const value_t... values) noexcept
Counts the occurrences of elements smaller or equal to v in the word starting at data up to position ...
static constexpr uint8_t sigma
const int_vector * m_v
Pointer to the rank supported bit_vector.
static constexpr uint8_t bits_per_word
int_vector ::value_type value_type
static constexpr uint32_t full_word_prefix_rank(const uint64_t word, const value_type v) noexcept
Counts the occurrences of v in the word starting at data up to position idx.
int_vector ::size_type size_type
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
io.hpp contains some methods for reading/writing sdsl structures.
Namespace for the succinct data structure library.
std::enable_if< has_serialize< X >::value, typenameX::size_type >::type serialize(const X &x, std::ostream &out, structure_tree_node *v=nullptr, std::string name="")
constexpr size_t ceil_log2(size_t const n)
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
std::enable_if< has_load< X >::value, void >::type load(X &x, std::istream &in)
rank_support_int.hpp contains classes that support a sdsl::int_vector with constant time rank informa...
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Stores a superblock entry in a cache friendly pattern.
value_type value_at(size_type position) const noexcept
Extracts the value at the given position.
std::array< detail::bit_compressed_word< uint8_t, sigma_bits >, words_per_superblock > superblock_text
The array storing the bit compressed text.
typename base_t::size_type size_type
friend bool operator==(superblock_entry const &lhs, superblock_entry const &rhs) noexcept
Equality operator.
std::array< block_value_type,(blocks_per_superblock - 1) *(alphabet_size - 1)> blocks
The array storing the block values.
constexpr size_type superblock_rank(value_type const v) const noexcept
Returns the rank value from the superblock.
constexpr size_type block_rank(size_t const position, value_type const v) const noexcept
Returns the rank value from the block.
void load(std::istream &in)
Loads from the stream.
constexpr size_type in_block_rank(size_t const position, value_type const v) const noexcept
Returns the rank value from the in-block query.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Loads from the archive.
std::conditional_t< bits_per_block_value<=8, uint8_t, std::conditional_t< bits_per_block_value<=16, uint16_t, uint32_t > > block_value_type
The smallest integer type needed to store the block ranks.
friend bool operator!=(superblock_entry const &lhs, superblock_entry const &rhs) noexcept
Inequality operator.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Saves to the archive.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, const std::string name="") const
Saves to the stream.