9#ifndef INCLUDED_CSA_SAMPLING_STRATEGY
10#define INCLUDED_CSA_SAMPLING_STRATEGY
71template <
class t_csa, u
int8_t t_w
idth = 0>
130template <u
int8_t t_w
idth = 0>
133 template <
class t_csa>
138template <
class t_csa,
class t_bv = bit_vector,
class t_rank =
typename t_bv::rank_1_type, u
int8_t t_w
idth = 0>
143 t_rank m_rank_marked;
182 for (
size_type i = 0, sa_cnt = 0; i < n; ++i)
191 m_marked = std::move(t_bv(
marked));
192 util::init_support(m_rank_marked, &m_marked);
198 m_marked = st.m_marked;
199 m_rank_marked = st.m_rank_marked;
200 m_rank_marked.set_vector(&m_marked);
226 m_marked = st.m_marked;
227 m_rank_marked = st.m_rank_marked;
228 m_rank_marked.set_vector(&m_marked);
237 m_marked.swap(st.m_marked);
238 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
246 written_bytes += m_marked.serialize(out, child,
"marked");
247 written_bytes += m_rank_marked.serialize(out, child,
"rank_marked");
249 return written_bytes;
256 m_rank_marked.load(in);
257 m_rank_marked.set_vector(&m_marked);
260 template <
typename archive_t>
268 template <
typename archive_t>
274 m_rank_marked.set_vector(&m_marked);
278template <
class t_bit_vec = sd_vector<>,
class t_rank_sup =
typename t_bit_vec::rank_1_type, u
int8_t t_w
idth = 0>
281 template <
class t_csa>
286template <
class t_csa,
289 class t_rank_sa =
typename t_bv_sa::rank_1_type,
290 class t_select_isa =
typename t_bv_isa::select_1_type>
295 t_rank_sa m_rank_marked_sa;
296 t_bv_isa m_marked_isa;
297 t_select_isa m_select_marked_isa;
350 uint64_t min_prev_val = 0;
354 size_type pos_cnd = isa_buf[i] >= min_prev_val ? i : n;
357 if (isa_buf[j] < isa_buf[pos_min])
359 if (isa_buf[j] >= min_prev_val)
365 else if (isa_buf[j] < isa_buf[pos_cnd])
376 min_prev_val = isa_buf[pos_cnd];
378 inv_perm[cnt++] = min_prev_val;
381 m_marked_isa = std::move(t_bv_isa(
marked_isa));
382 util::init_support(m_select_marked_isa, &m_marked_isa);
392 m_marked_sa = std::move(t_bv_sa(
marked_sa));
393 util::init_support(m_rank_marked_sa, &m_marked_sa);
395 std::string tmp_key =
406 m_marked_sa(st.m_marked_sa),
407 m_rank_marked_sa(st.m_rank_marked_sa),
408 m_marked_isa(st.m_marked_isa),
409 m_select_marked_isa(st.m_select_marked_isa),
410 m_inv_perm(st.m_inv_perm)
412 m_rank_marked_sa.set_vector(&m_marked_sa);
413 m_select_marked_isa.set_vector(&m_marked_isa);
418 m_marked_sa(std::move(st.m_marked_sa)),
419 m_rank_marked_sa(std::move(st.m_rank_marked_sa)),
420 m_marked_isa(std::move(st.m_marked_isa)),
421 m_select_marked_isa(std::move(st.m_select_marked_isa)),
422 m_inv_perm(std::move(st.m_inv_perm))
424 m_rank_marked_sa.set_vector(&m_marked_sa);
425 m_select_marked_isa.set_vector(&m_marked_isa);
431 return m_marked_sa[i];
437 return m_select_marked_isa(m_inv_perm.
select(1, m_rank_marked_sa(i)) + 1);
443 return m_inv_perm[i];
448 return m_inv_perm.
size();
457 *
this = std::move(tmp);
465 m_marked_sa = std::move(st.m_marked_sa);
466 m_rank_marked_sa = std::move(st.m_rank_marked_sa);
467 m_marked_isa = std::move(st.m_marked_isa);
468 m_select_marked_isa = std::move(st.m_select_marked_isa);
469 m_inv_perm = std::move(st.m_inv_perm);
470 m_rank_marked_sa.set_vector(&m_marked_sa);
471 m_select_marked_isa.set_vector(&m_marked_isa);
479 written_bytes += m_marked_sa.serialize(out, child,
"marked_sa");
480 written_bytes += m_rank_marked_sa.serialize(out, child,
"rank_marked_sa");
481 written_bytes += m_marked_isa.serialize(out, child,
"marked_isa");
482 written_bytes += m_select_marked_isa.serialize(out, child,
"select_marked_isa");
483 written_bytes += m_inv_perm.
serialize(out, child,
"inv_perm");
485 return written_bytes;
490 m_marked_sa.load(in);
491 m_rank_marked_sa.load(in);
492 m_rank_marked_sa.set_vector(&m_marked_sa);
493 m_marked_isa.load(in);
494 m_select_marked_isa.load(in);
495 m_select_marked_isa.set_vector(&m_marked_isa);
499 template <
typename archive_t>
509 template <
typename archive_t>
514 m_rank_marked_sa.set_vector(&m_marked_sa);
517 m_select_marked_isa.set_vector(&m_marked_isa);
524 return (m_marked_sa == other.m_marked_sa) && (m_rank_marked_sa == other.m_rank_marked_sa)
525 && (m_marked_isa == other.m_marked_isa) && (m_select_marked_isa == other.m_select_marked_isa)
526 && (m_inv_perm == other.m_inv_perm);
532 return !(*
this == other);
535template <
class t_bv_sa = sd_vector<>,
536 class t_bv_isa = sd_vector<>,
537 class t_rank_sa =
typename t_bv_sa::rank_1_type,
538 class t_select_isa =
typename t_bv_isa::select_1_type>
541 template <
class t_csa>
570template <
class t_csa,
class t_bv = bit_vector,
class t_rank =
typename t_bv::rank_1_type, u
int8_t t_w
idth = 0>
575 t_rank m_rank_marked;
611 typedef typename t_csa::char_type char_type;
612 std::set<char_type> char_map;
615 for (uint64_t i = 0; i < sample_char.
size(); ++i)
617 char_map.
insert((char_type)sample_char[i]);
624 char_type bwt = bwt_buf[i];
630 else if (char_map.find(bwt) != char_map.end())
646 m_marked = std::move(marked);
647 util::init_support(m_rank_marked, &m_marked);
653 m_marked = st.m_marked;
654 m_rank_marked = st.m_rank_marked;
655 m_rank_marked.set_vector(&m_marked);
676 m_marked = st.m_marked;
677 m_rank_marked = st.m_rank_marked;
678 m_rank_marked.set_vector(&m_marked);
687 m_marked.swap(st.m_marked);
688 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
696 written_bytes += m_marked.serialize(out, child,
"marked");
697 written_bytes += m_rank_marked.serialize(out, child,
"rank_marked");
699 return written_bytes;
706 m_rank_marked.load(in);
707 m_rank_marked.set_vector(&m_marked);
710 template <
typename archive_t>
718 template <
typename archive_t>
724 m_rank_marked.set_vector(&m_marked);
728template <
class t_bit_vec = bit_vector,
class t_rank_sup =
typename t_bit_vec::rank_1_type, u
int8_t t_w
idth = 0>
731 template <
class t_csa>
736template <
class t_csa, u
int8_t t_w
idth = 0>
743 typedef typename t_csa::sa_sample_type
sa_type;
771 base_type::operator[](i) = 0;
793 return std::make_tuple(base_type::operator[](ci), ci *
sample_dens);
800 return std::make_tuple(base_type::operator[](ci), ci *
sample_dens);
809 template <
typename archive_t>
815 template <
typename archive_t>
825template <u
int8_t t_w
idth = 0>
828 template <
class t_csa>
833template <
class t_csa,
class t_inv_perm,
class t_sel>
836 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
837 "ISA sampling requires: sa_sample_dens == isa_sample_dens");
842 typedef typename t_csa::sa_sample_type
sa_type;
851 t_sel m_select_marked;
852 t_inv_perm m_inv_perm;
869 const typename std::enable_if<sa_type::text_order, sa_type *>::type sa_sample)
872 m_select_marked = t_sel(&(sa_sample->marked));
874 m_inv_perm = t_inv_perm(perm);
875 m_inv_perm.set_vector(perm);
881 m_inv_perm = st.m_inv_perm;
882 m_select_marked = st.m_select_marked;
888 return m_select_marked(m_inv_perm[i /
sample_dens] + 1);
895 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci *
sample_dens);
902 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci *
sample_dens);
910 m_inv_perm = st.m_inv_perm;
911 m_select_marked = st.m_select_marked;
921 m_inv_perm.swap(st.m_inv_perm);
922 m_select_marked.swap(st.m_select_marked);
930 written_bytes += m_inv_perm.serialize(out, child,
"inv_perm");
931 written_bytes += m_select_marked.serialize(out, child,
"select_marked");
933 return written_bytes;
940 m_select_marked.load(in);
944 template <
typename archive_t>
951 template <
typename archive_t>
962 return (m_inv_perm == other.m_inv_perm) && (m_select_marked == other.m_select_marked);
968 return !(*
this == other);
973 if (sa_sample ==
nullptr)
975 m_select_marked.set_vector(
nullptr);
976 m_inv_perm.set_vector(
nullptr);
980 m_select_marked.set_vector(&(sa_sample->marked));
986template <
class t_inv_perm = inv_perm_support<8>,
class t_sel =
void>
989 template <
class t_csa>
993 typename std::conditional<std::is_void<t_sel>::value,
994 typename t_csa::sa_sample_type::bv_type::select_1_type,
999template <
class t_csa,
class t_select_sa>
1002 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
1003 "ISA sampling requires: sa_sample_dens==isa_sample_dens");
1016 sa_type const * m_sa_p =
nullptr;
1017 t_select_sa m_select_marked_sa;
1033 util::init_support(m_select_marked_sa, &(sa_sample->marked_sa));
1045 return m_sa_p->inv(i);
1052 size_type j = m_sa_p->select_marked_isa(ci + 1);
1061 ci = m_sa_p->size() - 1;
1063 j = m_sa_p->select_marked_isa(ci + 1);
1065 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
1072 size_type j = m_sa_p->select_marked_isa(ci + 1);
1075 if (ci < m_sa_p->
size() - 1)
1083 j = m_sa_p->select_marked_isa(ci + 1);
1085 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
1093 m_select_marked_sa = st.m_select_marked_sa;
1102 m_select_marked_sa.swap(st.m_select_marked_sa);
1109 written_bytes += m_select_marked_sa.serialize(out, child,
"select_marked_sa");
1111 return written_bytes;
1117 m_select_marked_sa.load(in);
1121 template <
typename archive_t>
1127 template <
typename archive_t>
1137 return (m_select_marked_sa == other.m_select_marked_sa);
1143 return !(*
this == other);
1149 if (
nullptr != m_sa_p)
1151 m_select_marked_sa.set_vector(&(sa_sample->marked_sa));
1156template <
class t_select_sa =
void>
1159 template <
class t_csa>
1162 typename std::conditional<std::is_void<t_select_sa>::value,
1163 typename t_csa::sa_sample_type::bv_sa_type::select_1_type,
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
_bwt_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_bwt_sampling()
Default constructor.
base_type::value_type value_type
_bwt_sampling & operator=(_bwt_sampling const &st)
Assignment operation.
void load(std::istream &in)
_bwt_sampling(_bwt_sampling const &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
int_vector< t_width > base_type
base_type::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
void swap(_bwt_sampling &st)
Swap operation.
sa_sampling_tag sampling_category
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
bool operator!=(_fuzzy_isa_sampling_support const &other) const noexcept
Inequality operator.
void swap(_fuzzy_isa_sampling_support &st)
Swap operation.
bit_vector::value_type value_type
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, sa_type const *sa_sample=nullptr)
_fuzzy_isa_sampling_support & operator=(_fuzzy_isa_sampling_support const &st)
Assignment operation.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
bool operator==(_fuzzy_isa_sampling_support const &other) const noexcept
Equality operator.
void set_vector(sa_type const *sa_sample=nullptr)
bit_vector::size_type size_type
t_csa::sa_sample_type sa_type
_fuzzy_isa_sampling_support()
Default constructor.
_fuzzy_isa_sampling_support(_fuzzy_isa_sampling_support const &st)
Copy constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
isa_sampling_tag sampling_category
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void load(std::istream &in, sa_type const *sa_sample=nullptr)
Load sampling from disk.
_fuzzy_isa_sampling_support(SDSL_UNUSED cache_config const &cconfig, sa_type const *sa_sample)
Constructor.
bool operator==(_fuzzy_sa_sampling const &other) const noexcept
Equality operator.
sa_sampling_tag sampling_category
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling &&st)
Move assignment operation.
_fuzzy_sa_sampling(_fuzzy_sa_sampling &&st)
Move constructor.
value_type inv(size_type i) const
Return the inv permutation at position i (already condensed!!!)
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_fuzzy_sa_sampling(cache_config &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
bit_vector::value_type value_type
t_bv_isa const & marked_isa
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling const &st)
Assignment operation.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_fuzzy_sa_sampling(_fuzzy_sa_sampling const &st)
Copy constructor.
_fuzzy_sa_sampling()
Default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
t_rank_sa const & rank_marked_sa
bit_vector::size_type size_type
t_select_isa const & select_marked_isa
t_bv_sa const & marked_sa
bool operator!=(_fuzzy_sa_sampling const &other) const noexcept
Inequality operator.
void load(std::istream &in)
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
void load(std::istream &in, SDSL_UNUSED sa_type const *sa_sample=nullptr)
Load sampling from disk.
_isa_sampling()
Default constructor.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
value_type operator[](size_type i) const
Returns the ISA value at position j, where.
isa_sampling_tag sampling_category
_isa_sampling(cache_config const &cconfig, SDSL_UNUSED sa_type const *sa_sample=nullptr)
Constructor.
base_type::value_type value_type
int_vector< t_width > base_type
void set_vector(SDSL_UNUSED sa_type const *)
base_type::size_type size_type
t_csa::sa_sample_type sa_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
int_vector< t_width > base_type
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
_sa_order_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
base_type::size_type size_type
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_sa_order_sampling()
Default constructor.
sa_sampling_tag sampling_category
base_type::value_type value_type
void set_vector(sa_type const *sa_sample=nullptr)
_text_order_isa_sampling_support(SDSL_UNUSED cache_config const &cconfig, const typename std::enable_if< sa_type::text_order, sa_type * >::type sa_sample)
Constructor.
isa_sampling_tag sampling_category
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
_text_order_isa_sampling_support & operator=(_text_order_isa_sampling_support const &st)
Assignment operation.
bool operator==(_text_order_isa_sampling_support const &other) const noexcept
Equality operator.
void load(std::istream &in, sa_type const *sa_sample=nullptr)
Load sampling from disk.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_isa_sampling_support(_text_order_isa_sampling_support const &st)
Copy constructor.
_text_order_isa_sampling_support()
Default constructor.
t_sel const & select_marked
bit_vector::size_type size_type
bool operator!=(_text_order_isa_sampling_support const &other) const noexcept
Inequality operator.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void swap(_text_order_isa_sampling_support &st)
Swap operation.
bit_vector::value_type value_type
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, sa_type const *sa_sample=nullptr)
t_csa::sa_sample_type sa_type
value_type condensed_sa(size_type i) const
sa_sampling_tag sampling_category
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
int_vector< t_width > base_type
t_rank const & rank_marked
base_type::size_type size_type
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
_text_order_sampling(_text_order_sampling const &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_text_order_sampling & operator=(_text_order_sampling const &st)
Assignment operation.
void swap(_text_order_sampling &st)
Swap operation.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void load(std::istream &in)
base_type::value_type value_type
_text_order_sampling()
Default constructor.
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
std::enable_if<!cereal::traits::is_output_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal if archive is not binary.
void swap(int_vector &v) noexcept
Swap method for int_vector.
int_vector_size_type size_type
reference operator[](size_type const &i) noexcept
non const version of [] operator
void load(std::istream &in)
Load the int_vector for a stream.
int_vector & operator=(int_vector const &v)
Assignment operator.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
size_type size() const noexcept
The number of elements in the int_vector.
iterator insert(const_iterator it, value_type value)
Insert an element before the element that the iterator is pointing to.
std::enable_if<!cereal::traits::is_input_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Serialise (load) via cereal if archive is not binary.
int_vector_trait< t_width >::value_type value_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
static mm_event_proxy event(std::string const &name)
A rank structure proposed by Sebastiano Vigna.
A bit vector which compresses very sparse populated bit vectors by.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
size_type size() const
Returns the size of the original vector.
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
void load(std::istream &in)
Loads the data structure from the given istream.
construct.hpp contains methods to construct indexes (compressed suffix arrays and trees).
construct_isa.hpp contains a space and time efficient construction method for the inverse suffix arra...
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
inv_perm_support.hpp contains a class which adds access to the inverse of a permutation.
io.hpp contains some methods for reading/writing sdsl structures.
memory_tracking.hpp contains two function for allocating and deallocating memory
constexpr char KEY_SAMPLE_CHAR[]
std::string to_string(T const &t, int w=1)
void bit_compress(t_int_vec &v)
Bit compress the int_vector.
Namespace for the succinct data structure library.
bool cache_file_exists(std::string const &key, cache_config const &config)
Checks if the resource specified by the key exists in the cache.
int remove(std::string const &)
Remove a file.
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
bool load_from_cache(T &v, std::string const &key, cache_config const &config, bool add_type_hash=false)
void construct(t_index &idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
void construct_isa(cache_config &config)
bool store_to_file(T const &v, std::string const &file)
Store a data structure to a file.
int_vector ::size_type size(range_type const &r)
Size of a range.
rank_support_v.hpp contains rank_support_v.
rrr_vector.hpp contains the sdsl::rrr_vector class, and classes which support rank and select for rrr...
sd_vector.hpp contains the sdsl::sd_vector class, and classes which support rank and select for sd_ve...
Contains declarations and definitions of data structure concepts.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Helper class for construction process.
_fuzzy_isa_sampling_support< t_csa, typename std::conditional< std::is_void< t_select_sa >::value, typename t_csa::sa_sample_type::bv_sa_type::select_1_type, t_select_sa >::type > type
_text_order_isa_sampling_support< t_csa, t_inv_perm, typename std::conditional< std::is_void< t_sel >::value, typename t_csa::sa_sample_type::bv_type::select_1_type, t_sel >::type > type
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_int.hpp contains a specialized class for a wavelet tree of a sequence of the numbers.