9#ifndef INCLUDED_SDSL_WT_EPR
10#define INCLUDED_SDSL_WT_EPR
31template <u
int8_t alphabet_size,
class rank_type = rank_support_
int_v<alphabet_size>,
class t_tree_strat =
byte_tree<>>
50 static constexpr bool has_inblock_text = std::is_same<rank_type, rank_support_int_v<alphabet_size>>::value;
58 template <
bool has_inblock_text_>
59 auto construct_init_rank_select(
int_vector<> intermediate_bitvector) -> std::enable_if_t<has_inblock_text_, void>
62 m_bv_rank = rank_type{ &intermediate_bitvector };
66 template <
bool has_inblock_text_>
67 auto construct_init_rank_select(
int_vector<> intermediate_bitvector) -> std::enable_if_t<!has_inblock_text_, void>
69 m_bv = std::move(intermediate_bitvector);
70 m_bv_rank = rank_type{ &m_bv };
74 template <
bool has_inblock_text_>
75 auto value_at(
size_type const position)
const -> std::enable_if_t<has_inblock_text_, value_type>
77 assert(position <
size());
78 return m_bv_rank.value_at(position);
82 template <
bool has_inblock_text_>
83 auto value_at(
size_type const position)
const -> std::enable_if_t<!has_inblock_text_, value_type>
85 assert(position <
size());
86 return m_bv[position];
102 template <
typename t_it>
106 if (0 == m_size)
return;
111 std::vector<size_type> C;
118 if (m_sigma > alphabet_size)
119 throw std::domain_error{
"The given text uses an alphabet that is larger than the explicitly given "
124 intermediate_bitvector.
width(std::ceil(std::log2(m_sigma)));
125 intermediate_bitvector.resize(m_size);
127 std::copy(
begin,
end, intermediate_bitvector.begin());
130 construct_init_rank_select<has_inblock_text>(std::move(intermediate_bitvector));
133 template <
typename t_it>
141 , m_sigma(wt.m_sigma)
143 , m_bv_rank(wt.m_bv_rank)
145 m_bv_rank.set_vector(&m_bv);
150 , m_sigma(wt.m_sigma)
151 , m_bv(std::move(wt.m_bv))
152 , m_bv_rank(std::move(wt.m_bv_rank))
154 m_bv_rank.set_vector(&m_bv);
163 *
this = std::move(tmp);
174 m_sigma = wt.m_sigma;
175 m_bv = std::move(wt.m_bv);
176 m_bv_rank = std::move(wt.m_bv_rank);
177 m_bv_rank.set_vector(&m_bv);
186 bool empty()
const {
return m_size == 0; }
200 return value_at<has_inblock_text>(i);
216 return m_bv_rank.rank(i, c);
232 return std::make_pair(m_bv_rank.rank(i, value), value);
275 template <
class t_ret_type = std::tuple<
size_type,
size_type,
size_type>>
278 assert(i <= j and j <=
size());
287 size_type prefix_i_c = m_bv_rank.prefix_rank(i, c);
289 size_type greater = j - i - m_bv_rank.prefix_rank(j, c) + prefix_i_c;
292 prefix_i_c_1 = m_bv_rank.prefix_rank(i, c - 1);
293 smaller = m_bv_rank.prefix_rank(j, c - 1) - prefix_i_c_1;
297 return t_ret_type{
rank, smaller, greater };
311 template <
class t_ret_type = std::tuple<
size_type,
size_type>>
317 if (c > 0) prefix_count_smaller = m_bv_rank.prefix_rank(i, c - 1);
318 return t_ret_type{ m_bv_rank.prefix_rank(i, c) - prefix_count_smaller, prefix_count_smaller };
332 written_bytes +=
write_member(m_size, out, child,
"size");
333 written_bytes +=
write_member(m_sigma, out, child,
"sigma");
334 written_bytes += m_bv.
serialize(out, child,
"bv");
335 written_bytes += m_bv_rank.serialize(out, child,
"bv_rank");
337 return written_bytes;
346 m_bv_rank.load(in, &m_bv);
352 return (lhs.m_size == rhs.m_size) && (lhs.m_sigma == rhs.m_sigma) && (lhs.m_bv == rhs.m_bv) &&
353 (lhs.m_bv_rank == rhs.m_bv_rank);
359 template <
typename archive_t>
368 template <
typename archive_t>
375 m_bv_rank.set_vector(&m_bv);
A generic vector class for integers of width .
void load(std::istream &in)
Load the int_vector for a stream.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
Generic iterator for a random access container.
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
An EPR-dictionary based wavelet.
int_vector ::difference_type difference_type
const_iterator begin() const
Returns a const_iterator to the first element.
bool empty() const
Returns whether the wavelet tree contains no data.
wt_epr(const wt_epr &wt)
Copy constructor.
auto operator[](size_type const i) const
Recovers the i-th symbol of the original vector.
wt_epr()=default
Default constructor.
int_vector ::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
t_ret_type lex_smaller_count(size_type i, value_type c) const
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
friend bool operator!=(wt_epr const &lhs, wt_epr const &rhs) noexcept
Inequality operator.
const_iterator end() const
Returns a const_iterator to the element after the last element.
size_type size() const
Returns the size of the original vector.
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many times symbol wt[i] occurs in the prefix [0..i-1].
t_ret_type lex_count(size_type i, size_type j, value_type c) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
wt_epr(t_it begin, t_it end)
Construct the EPR-dictionary from a sequence defined by two interators.
wt_epr & operator=(const wt_epr &wt)
Assignment operator.
int_vector ::value_type value_type
random_access_const_iterator< wt_epr > const_iterator
wt_epr(t_it begin, t_it end, std::string)
byte_alphabet_tag alphabet_category
void load(std::istream &in)
Loads the data structure from the given istream.
wt_epr & operator=(wt_epr &&wt)
Move assignment operator.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
t_tree_strat::template type< wt_epr > tree_strat_type
friend bool operator==(wt_epr const &lhs, wt_epr const &rhs) noexcept
Equality operator.
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
void calculate_effective_alphabet_size(const t_rac &C, sigma_type &sigma)
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
rank_support_int.hpp contains classes that support a sdsl::int_vector with constant time rank informa...