9#ifndef INCLUDED_CSA_SAMPLING_STRATEGY
10#define INCLUDED_CSA_SAMPLING_STRATEGY
71template <
class t_csa, u
int8_t t_w
idth = 0>
130template <u
int8_t t_w
idth = 0>
133 template <
class t_csa>
138template <
class t_csa,
class t_bv = bit_vector,
class t_rank =
typename t_bv::rank_1_type, u
int8_t t_w
idth = 0>
143 t_rank m_rank_marked;
182 for (
size_type i = 0, sa_cnt = 0; i < n; ++i)
191 m_marked = std::move(t_bv(
marked));
192 util::init_support(m_rank_marked, &m_marked);
198 m_marked = st.m_marked;
199 m_rank_marked = st.m_rank_marked;
200 m_rank_marked.set_vector(&m_marked);
226 m_marked = st.m_marked;
227 m_rank_marked = st.m_rank_marked;
228 m_rank_marked.set_vector(&m_marked);
237 m_marked.swap(st.m_marked);
238 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
246 written_bytes += m_marked.serialize(out, child,
"marked");
247 written_bytes += m_rank_marked.serialize(out, child,
"rank_marked");
249 return written_bytes;
256 m_rank_marked.load(in);
257 m_rank_marked.set_vector(&m_marked);
260 template <
typename archive_t>
268 template <
typename archive_t>
274 m_rank_marked.set_vector(&m_marked);
278template <
class t_bit_vec = sd_vector<>,
class t_rank_sup =
typename t_bit_vec::rank_1_type, u
int8_t t_w
idth = 0>
281 template <
class t_csa>
286template <
class t_csa,
289 class t_rank_sa =
typename t_bv_sa::rank_1_type,
290 class t_select_isa =
typename t_bv_isa::select_1_type>
295 t_rank_sa m_rank_marked_sa;
296 t_bv_isa m_marked_isa;
297 t_select_isa m_select_marked_isa;
349 uint64_t min_prev_val = 0;
353 size_type pos_cnd = isa_buf[i] >= min_prev_val ? i : n;
356 if (isa_buf[j] < isa_buf[pos_min])
358 if (isa_buf[j] >= min_prev_val)
364 else if (isa_buf[j] < isa_buf[pos_cnd])
374 min_prev_val = isa_buf[pos_cnd];
376 inv_perm[cnt++] = min_prev_val;
379 m_marked_isa = std::move(t_bv_isa(
marked_isa));
380 util::init_support(m_select_marked_isa, &m_marked_isa);
390 m_marked_sa = std::move(t_bv_sa(
marked_sa));
391 util::init_support(m_rank_marked_sa, &m_marked_sa);
393 std::string tmp_key =
404 m_marked_sa(st.m_marked_sa),
405 m_rank_marked_sa(st.m_rank_marked_sa),
406 m_marked_isa(st.m_marked_isa),
407 m_select_marked_isa(st.m_select_marked_isa),
408 m_inv_perm(st.m_inv_perm)
410 m_rank_marked_sa.set_vector(&m_marked_sa);
411 m_select_marked_isa.set_vector(&m_marked_isa);
416 m_marked_sa(std::move(st.m_marked_sa)),
417 m_rank_marked_sa(std::move(st.m_rank_marked_sa)),
418 m_marked_isa(std::move(st.m_marked_isa)),
419 m_select_marked_isa(std::move(st.m_select_marked_isa)),
420 m_inv_perm(std::move(st.m_inv_perm))
422 m_rank_marked_sa.set_vector(&m_marked_sa);
423 m_select_marked_isa.set_vector(&m_marked_isa);
429 return m_marked_sa[i];
435 return m_select_marked_isa(m_inv_perm.
select(1, m_rank_marked_sa(i)) + 1);
441 return m_inv_perm[i];
446 return m_inv_perm.
size();
455 *
this = std::move(tmp);
463 m_marked_sa = std::move(st.m_marked_sa);
464 m_rank_marked_sa = std::move(st.m_rank_marked_sa);
465 m_marked_isa = std::move(st.m_marked_isa);
466 m_select_marked_isa = std::move(st.m_select_marked_isa);
467 m_inv_perm = std::move(st.m_inv_perm);
468 m_rank_marked_sa.set_vector(&m_marked_sa);
469 m_select_marked_isa.set_vector(&m_marked_isa);
477 written_bytes += m_marked_sa.serialize(out, child,
"marked_sa");
478 written_bytes += m_rank_marked_sa.serialize(out, child,
"rank_marked_sa");
479 written_bytes += m_marked_isa.serialize(out, child,
"marked_isa");
480 written_bytes += m_select_marked_isa.serialize(out, child,
"select_marked_isa");
481 written_bytes += m_inv_perm.
serialize(out, child,
"inv_perm");
483 return written_bytes;
488 m_marked_sa.load(in);
489 m_rank_marked_sa.load(in);
490 m_rank_marked_sa.set_vector(&m_marked_sa);
491 m_marked_isa.load(in);
492 m_select_marked_isa.load(in);
493 m_select_marked_isa.set_vector(&m_marked_isa);
497 template <
typename archive_t>
507 template <
typename archive_t>
512 m_rank_marked_sa.set_vector(&m_marked_sa);
515 m_select_marked_isa.set_vector(&m_marked_isa);
522 return (m_marked_sa == other.m_marked_sa) && (m_rank_marked_sa == other.m_rank_marked_sa)
523 && (m_marked_isa == other.m_marked_isa) && (m_select_marked_isa == other.m_select_marked_isa)
524 && (m_inv_perm == other.m_inv_perm);
530 return !(*
this == other);
533template <
class t_bv_sa = sd_vector<>,
534 class t_bv_isa = sd_vector<>,
535 class t_rank_sa =
typename t_bv_sa::rank_1_type,
536 class t_select_isa =
typename t_bv_isa::select_1_type>
539 template <
class t_csa>
568template <
class t_csa,
class t_bv = bit_vector,
class t_rank =
typename t_bv::rank_1_type, u
int8_t t_w
idth = 0>
573 t_rank m_rank_marked;
609 typedef typename t_csa::char_type char_type;
610 std::set<char_type> char_map;
613 for (uint64_t i = 0; i < sample_char.
size(); ++i)
615 char_map.insert((char_type)sample_char[i]);
622 char_type bwt = bwt_buf[i];
628 else if (char_map.find(bwt) != char_map.end())
644 m_marked = std::move(marked);
645 util::init_support(m_rank_marked, &m_marked);
651 m_marked = st.m_marked;
652 m_rank_marked = st.m_rank_marked;
653 m_rank_marked.set_vector(&m_marked);
674 m_marked = st.m_marked;
675 m_rank_marked = st.m_rank_marked;
676 m_rank_marked.set_vector(&m_marked);
685 m_marked.swap(st.m_marked);
686 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
694 written_bytes += m_marked.serialize(out, child,
"marked");
695 written_bytes += m_rank_marked.serialize(out, child,
"rank_marked");
697 return written_bytes;
704 m_rank_marked.load(in);
705 m_rank_marked.set_vector(&m_marked);
708 template <
typename archive_t>
716 template <
typename archive_t>
722 m_rank_marked.set_vector(&m_marked);
726template <
class t_bit_vec = bit_vector,
class t_rank_sup =
typename t_bit_vec::rank_1_type, u
int8_t t_w
idth = 0>
729 template <
class t_csa>
734template <
class t_csa, u
int8_t t_w
idth = 0>
741 typedef typename t_csa::sa_sample_type
sa_type;
769 base_type::operator[](i) = 0;
791 return std::make_tuple(base_type::operator[](ci), ci *
sample_dens);
798 return std::make_tuple(base_type::operator[](ci), ci *
sample_dens);
807 template <
typename archive_t>
813 template <
typename archive_t>
823template <u
int8_t t_w
idth = 0>
826 template <
class t_csa>
831template <
class t_csa,
class t_inv_perm,
class t_sel>
834 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
835 "ISA sampling requires: sa_sample_dens == isa_sample_dens");
840 typedef typename t_csa::sa_sample_type
sa_type;
849 t_sel m_select_marked;
850 t_inv_perm m_inv_perm;
867 const typename std::enable_if<sa_type::text_order, sa_type *>::type sa_sample)
870 m_select_marked = t_sel(&(sa_sample->marked));
872 m_inv_perm = t_inv_perm(perm);
873 m_inv_perm.set_vector(perm);
879 m_inv_perm = st.m_inv_perm;
880 m_select_marked = st.m_select_marked;
886 return m_select_marked(m_inv_perm[i /
sample_dens] + 1);
893 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci *
sample_dens);
900 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci *
sample_dens);
908 m_inv_perm = st.m_inv_perm;
909 m_select_marked = st.m_select_marked;
919 m_inv_perm.swap(st.m_inv_perm);
920 m_select_marked.swap(st.m_select_marked);
928 written_bytes += m_inv_perm.serialize(out, child,
"inv_perm");
929 written_bytes += m_select_marked.serialize(out, child,
"select_marked");
931 return written_bytes;
938 m_select_marked.load(in);
942 template <
typename archive_t>
949 template <
typename archive_t>
960 return (m_inv_perm == other.m_inv_perm) && (m_select_marked == other.m_select_marked);
966 return !(*
this == other);
971 if (sa_sample ==
nullptr)
973 m_select_marked.set_vector(
nullptr);
974 m_inv_perm.set_vector(
nullptr);
978 m_select_marked.set_vector(&(sa_sample->marked));
984template <
class t_inv_perm = inv_perm_support<8>,
class t_sel =
void>
987 template <
class t_csa>
991 typename std::conditional<std::is_void<t_sel>::value,
992 typename t_csa::sa_sample_type::bv_type::select_1_type,
997template <
class t_csa,
class t_select_sa>
1000 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
1001 "ISA sampling requires: sa_sample_dens==isa_sample_dens");
1014 sa_type const * m_sa_p =
nullptr;
1015 t_select_sa m_select_marked_sa;
1031 util::init_support(m_select_marked_sa, &(sa_sample->marked_sa));
1043 return m_sa_p->inv(i);
1050 size_type j = m_sa_p->select_marked_isa(ci + 1);
1059 ci = m_sa_p->size() - 1;
1061 j = m_sa_p->select_marked_isa(ci + 1);
1063 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
1070 size_type j = m_sa_p->select_marked_isa(ci + 1);
1073 if (ci < m_sa_p->
size() - 1)
1081 j = m_sa_p->select_marked_isa(ci + 1);
1083 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
1091 m_select_marked_sa = st.m_select_marked_sa;
1100 m_select_marked_sa.swap(st.m_select_marked_sa);
1107 written_bytes += m_select_marked_sa.serialize(out, child,
"select_marked_sa");
1109 return written_bytes;
1115 m_select_marked_sa.load(in);
1119 template <
typename archive_t>
1125 template <
typename archive_t>
1135 return (m_select_marked_sa == other.m_select_marked_sa);
1141 return !(*
this == other);
1147 if (
nullptr != m_sa_p)
1149 m_select_marked_sa.set_vector(&(sa_sample->marked_sa));
1154template <
class t_select_sa =
void>
1157 template <
class t_csa>
1160 typename std::conditional<std::is_void<t_select_sa>::value,
1161 typename t_csa::sa_sample_type::bv_sa_type::select_1_type,
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
_bwt_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
sa_sampling_tag sampling_category
_bwt_sampling()
Default constructor.
_bwt_sampling & operator=(_bwt_sampling const &st)
Assignment operation.
void load(std::istream &in)
base_type::value_type value_type
_bwt_sampling(_bwt_sampling const &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
base_type::size_type size_type
int_vector< t_width > base_type
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
void swap(_bwt_sampling &st)
Swap operation.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
bool operator!=(_fuzzy_isa_sampling_support const &other) const noexcept
Inequality operator.
t_csa::sa_sample_type sa_type
bit_vector::size_type size_type
isa_sampling_tag sampling_category
void swap(_fuzzy_isa_sampling_support &st)
Swap operation.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, sa_type const *sa_sample=nullptr)
_fuzzy_isa_sampling_support & operator=(_fuzzy_isa_sampling_support const &st)
Assignment operation.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
bit_vector::value_type value_type
bool operator==(_fuzzy_isa_sampling_support const &other) const noexcept
Equality operator.
void set_vector(sa_type const *sa_sample=nullptr)
_fuzzy_isa_sampling_support()
Default constructor.
_fuzzy_isa_sampling_support(_fuzzy_isa_sampling_support const &st)
Copy constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void load(std::istream &in, sa_type const *sa_sample=nullptr)
Load sampling from disk.
_fuzzy_isa_sampling_support(SDSL_UNUSED cache_config const &cconfig, sa_type const *sa_sample)
Constructor.
bool operator==(_fuzzy_sa_sampling const &other) const noexcept
Equality operator.
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling &&st)
Move assignment operation.
_fuzzy_sa_sampling(_fuzzy_sa_sampling &&st)
Move constructor.
value_type inv(size_type i) const
Return the inv permutation at position i (already condensed!!!)
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
bit_vector::value_type value_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_fuzzy_sa_sampling(cache_config &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
bit_vector::size_type size_type
sa_sampling_tag sampling_category
t_bv_isa const & marked_isa
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling const &st)
Assignment operation.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_fuzzy_sa_sampling(_fuzzy_sa_sampling const &st)
Copy constructor.
_fuzzy_sa_sampling()
Default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
t_rank_sa const & rank_marked_sa
t_select_isa const & select_marked_isa
t_bv_sa const & marked_sa
bool operator!=(_fuzzy_sa_sampling const &other) const noexcept
Inequality operator.
void load(std::istream &in)
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
base_type::size_type size_type
base_type::value_type value_type
void load(std::istream &in, SDSL_UNUSED sa_type const *sa_sample=nullptr)
Load sampling from disk.
isa_sampling_tag sampling_category
_isa_sampling()
Default constructor.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
value_type operator[](size_type i) const
Returns the ISA value at position j, where.
_isa_sampling(cache_config const &cconfig, SDSL_UNUSED sa_type const *sa_sample=nullptr)
Constructor.
void set_vector(SDSL_UNUSED sa_type const *)
t_csa::sa_sample_type sa_type
int_vector< t_width > base_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
_sa_order_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
sa_sampling_tag sampling_category
base_type::value_type value_type
int_vector< t_width > base_type
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
base_type::size_type size_type
_sa_order_sampling()
Default constructor.
void set_vector(sa_type const *sa_sample=nullptr)
_text_order_isa_sampling_support(SDSL_UNUSED cache_config const &cconfig, const typename std::enable_if< sa_type::text_order, sa_type * >::type sa_sample)
Constructor.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
_text_order_isa_sampling_support & operator=(_text_order_isa_sampling_support const &st)
Assignment operation.
bool operator==(_text_order_isa_sampling_support const &other) const noexcept
Equality operator.
void load(std::istream &in, sa_type const *sa_sample=nullptr)
Load sampling from disk.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_isa_sampling_support(_text_order_isa_sampling_support const &st)
Copy constructor.
_text_order_isa_sampling_support()
Default constructor.
isa_sampling_tag sampling_category
t_sel const & select_marked
bool operator!=(_text_order_isa_sampling_support const &other) const noexcept
Inequality operator.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void swap(_text_order_isa_sampling_support &st)
Swap operation.
t_csa::sa_sample_type sa_type
bit_vector::size_type size_type
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
bit_vector::value_type value_type
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, sa_type const *sa_sample=nullptr)
value_type condensed_sa(size_type i) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
sa_sampling_tag sampling_category
t_rank const & rank_marked
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
base_type::size_type size_type
_text_order_sampling(_text_order_sampling const &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_text_order_sampling & operator=(_text_order_sampling const &st)
Assignment operation.
void swap(_text_order_sampling &st)
Swap operation.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
int_vector< t_width > base_type
base_type::value_type value_type
void load(std::istream &in)
_text_order_sampling()
Default constructor.
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
int_vector_size_type size_type
std::enable_if<!cereal::traits::is_output_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
reference operator[](size_type const &i) noexcept
void swap(int_vector &v) noexcept
void load(std::istream &in)
int_vector & operator=(int_vector const &v)
size_type size() const noexcept
The number of elements in the int_vector.
void resize(const size_type size)
std::enable_if<!cereal::traits::is_input_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
int_vector_trait< t_width >::value_type value_type
uint8_t width() const noexcept
static mm_event_proxy event(std::string const &name)
A rank structure proposed by Sebastiano Vigna.
A bit vector which compresses very sparse populated bit vectors by.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
size_type size() const
Returns the size of the original vector.
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
void load(std::istream &in)
Loads the data structure from the given istream.
construct.hpp contains methods to construct indexes (compressed suffix arrays and trees).
construct_isa.hpp contains a space and time efficient construction method for the inverse suffix arra...
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
inv_perm_support.hpp contains a class which adds access to the inverse of a permutation.
io.hpp contains some methods for reading/writing sdsl structures.
memory_tracking.hpp contains two function for allocating and deallocating memory
constexpr char KEY_SAMPLE_CHAR[]
std::string to_string(T const &t, int w=1)
void bit_compress(t_int_vec &v)
Bit compress the int_vector.
Namespace for the succinct data structure library.
bool cache_file_exists(std::string const &key, cache_config const &config)
Checks if the resource specified by the key exists in the cache.
int remove(std::string const &)
Remove a file.
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
bool load_from_cache(T &v, std::string const &key, cache_config const &config, bool add_type_hash=false)
constexpr char const * key_bwt()
void construct(t_index &idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
void construct_isa(cache_config &config)
bool store_to_file(T const &v, std::string const &file)
Store a data structure to a file.
int_vector ::size_type size(range_type const &r)
Size of a range.
rank_support_v.hpp contains rank_support_v.
rrr_vector.hpp contains the sdsl::rrr_vector class, and classes which support rank and select for rrr...
sd_vector.hpp contains the sdsl::sd_vector class, and classes which support rank and select for sd_ve...
Contains declarations and definitions of data structure concepts.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Helper class for construction process.
_fuzzy_isa_sampling_support< t_csa, typename std::conditional< std::is_void< t_select_sa >::value, typename t_csa::sa_sample_type::bv_sa_type::select_1_type, t_select_sa >::type > type
_text_order_isa_sampling_support< t_csa, t_inv_perm, typename std::conditional< std::is_void< t_sel >::value, typename t_csa::sa_sample_type::bv_type::select_1_type, t_sel >::type > type
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_int.hpp contains a specialized class for a wavelet tree of a sequence of the numbers.