9#ifndef INCLUDED_SDSL_WT_AP
10#define INCLUDED_SDSL_WT_AP
36template <
class t_wt_
byte = wt_huff<bit_vector, rank_support_v5<>>,
class t_wt_
int = wm_
int<>>
39 static_assert(std::is_same<typename index_tag<t_wt_byte>::type,
wt_tag>::value,
40 "First template argument has to be a wavelet tree.");
41 static_assert(std::is_same<typename index_tag<t_wt_int>::type,
wt_tag>::value,
42 "Second template argument has to be a wavelet tree.");
70 inline std::tuple<bool, value_type, value_type> try_get_char_class_offset(
value_type c)
const
74 return std::make_tuple(
false, 0, 0);
79 return std::make_tuple(
false, 0, 0);
81 return std::make_tuple(
true, offset_class.second, offset_class.first);
96 template <
typename t_it>
100 const uint8_t wt_byte_width = wt_byte_type::alphabet_category::WIDTH;
101 const uint8_t wt_int_width = wt_int_type::alphabet_category::WIDTH;
105 std::vector<std::pair<size_type, value_type>> char_freq;
109 for (
auto it =
begin; it !=
end; ++it)
112 while (element >= max_symbol)
114 char_freq.emplace_back(0, max_symbol);
118 if (char_freq[element].first == 0) { pseudo_entries--; }
119 char_freq[element].first++;
121 std::sort(char_freq.rbegin(), char_freq.rend());
122 m_sigma = max_symbol - pseudo_entries;
128 std::vector<std::pair<std::string, int_vector_buffer<wt_int_width>>> temp_file_offset_buffers;
141 for (; offset < class_size && current_symbol <
m_sigma; ++offset, ++current_symbol)
143 m_char2class_buffer[char_freq[current_symbol].second] = i;
148 temp_file_offset_buffers.emplace_back(temp_file_offset,
167 for (
auto it =
begin; it !=
end; ++it)
176 temp_file_offset_buffers[cl].second.push_back(offset);
179 class_buffer.
close();
193 auto & temp_file_offset_buffer = temp_file_offset_buffers[i];
194 temp_file_offset_buffer.second.close();
224 *
this = std::move(tmp);
239 m_class = std::move(wt.m_class);
264 auto textoffset_class =
m_class.inverse_select(i);
265 auto cl = textoffset_class.second;
286 auto success_class_offset = try_get_char_class_offset(c);
287 if (!std::get<0>(success_class_offset)) {
return 0; }
288 auto cl = std::get<1>(success_class_offset);
289 auto offset = std::get<2>(success_class_offset);
305 auto textoffset_class =
m_class.inverse_select(i);
306 auto textoffset = textoffset_class.first;
307 auto cl = textoffset_class.second;
310 return std::make_pair(class_result.first,
m_char2class.select(class_result.second + 1, cl));
326 assert(1 <= i and i <=
rank(
size(), c));
327 auto success_class_offset = try_get_char_class_offset(c);
328 if (!std::get<0>(success_class_offset)) {
return m_size; }
329 auto cl = std::get<1>(success_class_offset);
330 auto offset = std::get<2>(success_class_offset);
333 return m_class.select(text_offset, cl);
345 written_bytes +=
m_char2class.serialize(out, child,
"char2class");
346 written_bytes +=
m_class.serialize(out, child,
"class");
349 written_bytes +=
m_offset[i].serialize(out, child,
"offset");
352 return written_bytes;
370 template <
typename archive_t>
383 template <
typename archive_t>
403 return (
m_size == other.m_size) && (
m_sigma == other.m_sigma) &&
bit_vectors.hpp contains classes for uncompressed and compressed bit vector representations.
void close(bool remove_file=false)
Close the int_vector_buffer.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
A generic vector class for integers of width .
static mm_event_proxy event(const std::string &name)
Generic iterator for a random access container.
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
std::vector< wt_int_type > m_offset
const_iterator end() const
bool operator!=(wt_ap const &other) const noexcept
Inequality operator.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
bool operator==(wt_ap const &other) const noexcept
Equality operator.
wt_ap(const wt_ap &wt)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Load via cereal.
wt_ap & operator=(wt_ap &&wt)
Assignment move operator.
wt_ap(wt_ap &&wt)
Move copy constructor.
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
wt_ap & operator=(const wt_ap &wt)
Assignment operator.
value_type m_singleton_class_cnt
wt_byte_type m_char2class
wt_ap(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
bool empty() const
Returns whether the wavelet tree contains no data.
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
void load(std::istream &in)
Loads the data structure from the given istream.
int_vector ::difference_type difference_type
int_vector ::size_type size_type
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
int_alphabet_tag alphabet_category
wt_ap()
Default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
int_vector ::value_type value_type
random_access_const_iterator< wt_ap > const_iterator
size_type size() const
Returns the size of the original vector.
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
int_vector.hpp contains the sdsl::int_vector class.
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
uint64_t count(const t_k2_treap &treap, k2_treap_ns::point_type p1, k2_treap_ns::point_type p2)
Count how many points are in the rectangle (p1,p2)
std::string ram_file_name(const std::string &file)
Returns the corresponding RAM-file name for file.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
int remove(const std::string &)
Remove a file.
void construct_im(t_index &idx, t_data &&data, uint8_t num_bytes=0)
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
wm_int.hpp contains a specialized class for a wavelet tree for sequences over large alphabets.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.