9#ifndef INCLUDED_CSA_SAMPLING_STRATEGY
10#define INCLUDED_CSA_SAMPLING_STRATEGY
51template <
class t_csa, u
int8_t t_w
idth = 0>
103template <u
int8_t t_w
idth = 0>
106 template <
class t_csa>
111template <
class t_csa,
class t_bv = bit_vector,
class t_rank =
typename t_bv::rank_1_type, u
int8_t t_w
idth = 0>
116 t_rank m_rank_marked;
154 for (
size_type i = 0, sa_cnt = 0; i < n; ++i)
163 m_marked = std::move(t_bv(
marked));
164 util::init_support(m_rank_marked, &m_marked);
171 m_marked = st.m_marked;
172 m_rank_marked = st.m_rank_marked;
173 m_rank_marked.set_vector(&m_marked);
190 m_marked = st.m_marked;
191 m_rank_marked = st.m_rank_marked;
192 m_rank_marked.set_vector(&m_marked);
201 m_marked.swap(st.m_marked);
202 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
210 written_bytes += m_marked.serialize(out, child,
"marked");
211 written_bytes += m_rank_marked.serialize(out, child,
"rank_marked");
213 return written_bytes;
220 m_rank_marked.load(in);
221 m_rank_marked.set_vector(&m_marked);
224 template <
typename archive_t>
232 template <
typename archive_t>
238 m_rank_marked.set_vector(&m_marked);
242template <
class t_bit_vec = sd_vector<>,
class t_rank_sup =
typename t_bit_vec::rank_1_type, u
int8_t t_w
idth = 0>
245 template <
class t_csa>
250template <
class t_csa,
253 class t_rank_sa =
typename t_bv_sa::rank_1_type,
254 class t_select_isa =
typename t_bv_isa::select_1_type>
259 t_rank_sa m_rank_marked_sa;
260 t_bv_isa m_marked_isa;
261 t_select_isa m_select_marked_isa;
313 uint64_t min_prev_val = 0;
317 size_type pos_cnd = isa_buf[i] >= min_prev_val ? i : n;
320 if (isa_buf[j] < isa_buf[pos_min]) pos_min = j;
321 if (isa_buf[j] >= min_prev_val)
323 if (pos_cnd == n) { pos_cnd = j; }
324 else if (isa_buf[j] < isa_buf[pos_cnd])
335 min_prev_val = isa_buf[pos_cnd];
337 inv_perm[cnt++] = min_prev_val;
340 m_marked_isa = std::move(t_bv_isa(
marked_isa));
341 util::init_support(m_select_marked_isa, &m_marked_isa);
348 m_marked_sa = std::move(t_bv_sa(
marked_sa));
349 util::init_support(m_rank_marked_sa, &m_marked_sa);
362 : m_marked_sa(st.m_marked_sa)
363 , m_rank_marked_sa(st.m_rank_marked_sa)
364 , m_marked_isa(st.m_marked_isa)
365 , m_select_marked_isa(st.m_select_marked_isa)
366 , m_inv_perm(st.m_inv_perm)
368 m_rank_marked_sa.set_vector(&m_marked_sa);
369 m_select_marked_isa.set_vector(&m_marked_isa);
374 : m_marked_sa(std::move(st.m_marked_sa))
375 , m_rank_marked_sa(std::move(st.m_rank_marked_sa))
376 , m_marked_isa(std::move(st.m_marked_isa))
377 , m_select_marked_isa(std::move(st.m_select_marked_isa))
378 , m_inv_perm(std::move(st.m_inv_perm))
380 m_rank_marked_sa.set_vector(&m_marked_sa);
381 m_select_marked_isa.set_vector(&m_marked_isa);
390 return m_select_marked_isa(m_inv_perm.
select(1, m_rank_marked_sa(i)) + 1);
404 *
this = std::move(tmp);
412 m_marked_sa = std::move(st.m_marked_sa);
413 m_rank_marked_sa = std::move(st.m_rank_marked_sa);
414 m_marked_isa = std::move(st.m_marked_isa);
415 m_select_marked_isa = std::move(st.m_select_marked_isa);
416 m_inv_perm = std::move(st.m_inv_perm);
417 m_rank_marked_sa.set_vector(&m_marked_sa);
418 m_select_marked_isa.set_vector(&m_marked_isa);
426 written_bytes += m_marked_sa.serialize(out, child,
"marked_sa");
427 written_bytes += m_rank_marked_sa.serialize(out, child,
"rank_marked_sa");
428 written_bytes += m_marked_isa.serialize(out, child,
"marked_isa");
429 written_bytes += m_select_marked_isa.serialize(out, child,
"select_marked_isa");
430 written_bytes += m_inv_perm.
serialize(out, child,
"inv_perm");
432 return written_bytes;
437 m_marked_sa.load(in);
438 m_rank_marked_sa.load(in);
439 m_rank_marked_sa.set_vector(&m_marked_sa);
440 m_marked_isa.load(in);
441 m_select_marked_isa.load(in);
442 m_select_marked_isa.set_vector(&m_marked_isa);
446 template <
typename archive_t>
456 template <
typename archive_t>
461 m_rank_marked_sa.set_vector(&m_marked_sa);
464 m_select_marked_isa.set_vector(&m_marked_isa);
471 return (m_marked_sa == other.m_marked_sa) && (m_rank_marked_sa == other.m_rank_marked_sa) &&
472 (m_marked_isa == other.m_marked_isa) && (m_select_marked_isa == other.m_select_marked_isa) &&
473 (m_inv_perm == other.m_inv_perm);
479template <
class t_bv_sa = sd_vector<>,
480 class t_bv_isa = sd_vector<>,
481 class t_rank_sa =
typename t_bv_sa::rank_1_type,
482 class t_select_isa =
typename t_bv_isa::select_1_type>
485 template <
class t_csa>
514template <
class t_csa,
class t_bv = bit_vector,
class t_rank =
typename t_bv::rank_1_type, u
int8_t t_w
idth = 0>
519 t_rank m_rank_marked;
549 key_bwt<t_csa::alphabet_type::int_width>(), cconfig));
554 typedef typename t_csa::char_type char_type;
555 std::set<char_type> char_map;
558 for (uint64_t i = 0; i < sample_char.
size(); ++i) { char_map.
insert((char_type)sample_char[i]); }
564 char_type bwt = bwt_buf[i];
570 else if (char_map.find(bwt) != char_map.end())
583 m_marked = std::move(marked);
584 util::init_support(m_rank_marked, &m_marked);
591 m_marked = st.m_marked;
592 m_rank_marked = st.m_rank_marked;
593 m_rank_marked.set_vector(&m_marked);
608 m_marked = st.m_marked;
609 m_rank_marked = st.m_rank_marked;
610 m_rank_marked.set_vector(&m_marked);
619 m_marked.swap(st.m_marked);
620 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
628 written_bytes += m_marked.serialize(out, child,
"marked");
629 written_bytes += m_rank_marked.serialize(out, child,
"rank_marked");
631 return written_bytes;
638 m_rank_marked.load(in);
639 m_rank_marked.set_vector(&m_marked);
642 template <
typename archive_t>
650 template <
typename archive_t>
656 m_rank_marked.set_vector(&m_marked);
660template <
class t_bit_vec = bit_vector,
class t_rank_sup =
typename t_bit_vec::rank_1_type, u
int8_t t_w
idth = 0>
663 template <
class t_csa>
668template <
class t_csa, u
int8_t t_w
idth = 0>
675 typedef typename t_csa::sa_sample_type
sa_type;
701 for (
size_type i = 0; i < this->
size(); ++i) base_type::operator[](i) = 0;
717 return std::make_tuple(base_type::operator[](ci), ci *
sample_dens);
724 return std::make_tuple(base_type::operator[](ci), ci *
sample_dens);
730 template <
typename archive_t>
736 template <
typename archive_t>
745template <u
int8_t t_w
idth = 0>
748 template <
class t_csa>
753template <
class t_csa,
class t_inv_perm,
class t_sel>
756 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
757 "ISA sampling requires: sa_sample_dens == isa_sample_dens");
762 typedef typename t_csa::sa_sample_type
sa_type;
771 t_sel m_select_marked;
772 t_inv_perm m_inv_perm;
788 const typename std::enable_if<sa_type::text_order, sa_type *>::type sa_sample)
791 m_select_marked = t_sel(&(sa_sample->marked));
793 m_inv_perm = t_inv_perm(perm);
794 m_inv_perm.set_vector(perm);
800 m_inv_perm = st.m_inv_perm;
801 m_select_marked = st.m_select_marked;
811 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci *
sample_dens);
818 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci *
sample_dens);
826 m_inv_perm = st.m_inv_perm;
827 m_select_marked = st.m_select_marked;
837 m_inv_perm.swap(st.m_inv_perm);
838 m_select_marked.swap(st.m_select_marked);
846 written_bytes += m_inv_perm.serialize(out, child,
"inv_perm");
847 written_bytes += m_select_marked.serialize(out, child,
"select_marked");
849 return written_bytes;
856 m_select_marked.load(in);
860 template <
typename archive_t>
867 template <
typename archive_t>
878 return (m_inv_perm == other.m_inv_perm) && (m_select_marked == other.m_select_marked);
886 if (sa_sample ==
nullptr)
888 m_select_marked.set_vector(
nullptr);
889 m_inv_perm.set_vector(
nullptr);
893 m_select_marked.set_vector(&(sa_sample->marked));
899template <
class t_inv_perm = inv_perm_support<8>,
class t_sel =
void>
902 template <
class t_csa>
905 typename std::conditional<std::is_void<t_sel>::value,
906 typename t_csa::sa_sample_type::bv_type::select_1_type,
911template <
class t_csa,
class t_select_sa>
914 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
915 "ISA sampling requires: sa_sample_dens==isa_sample_dens");
920 typedef typename t_csa::sa_sample_type
sa_type;
928 const sa_type * m_sa_p =
nullptr;
929 t_select_sa m_select_marked_sa;
945 util::init_support(m_select_marked_sa, &(sa_sample->marked_sa));
950 : m_select_marked_sa(st.m_select_marked_sa)
962 size_type j = m_sa_p->select_marked_isa(ci + 1);
965 if (ci > 0) { ci = ci - 1; }
968 ci = m_sa_p->size() - 1;
970 j = m_sa_p->select_marked_isa(ci + 1);
972 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
979 size_type j = m_sa_p->select_marked_isa(ci + 1);
982 if (ci < m_sa_p->
size() - 1) { ci = ci + 1; }
987 j = m_sa_p->select_marked_isa(ci + 1);
989 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
997 m_select_marked_sa = st.m_select_marked_sa;
1010 written_bytes += m_select_marked_sa.serialize(out, child,
"select_marked_sa");
1012 return written_bytes;
1018 m_select_marked_sa.load(in);
1022 template <
typename archive_t>
1028 template <
typename archive_t>
1038 return (m_select_marked_sa == other.m_select_marked_sa);
1047 if (
nullptr != m_sa_p) { m_select_marked_sa.set_vector(&(sa_sample->marked_sa)); }
1051template <
class t_select_sa =
void>
1054 template <
class t_csa>
1056 typename std::conditional<std::is_void<t_select_sa>::value,
1057 typename t_csa::sa_sample_type::bv_sa_type::select_1_type,
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_bwt_sampling()
Default constructor.
base_type::value_type value_type
_bwt_sampling & operator=(const _bwt_sampling &st)
Assignment operation.
void load(std::istream &in)
_bwt_sampling(const _bwt_sampling &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_bwt_sampling(const cache_config &cconfig, SDSL_UNUSED const t_csa *csa=nullptr)
Constructor.
int_vector< t_width > base_type
base_type::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
void swap(_bwt_sampling &st)
Swap operation.
sa_sampling_tag sampling_category
_fuzzy_isa_sampling_support & operator=(const _fuzzy_isa_sampling_support &st)
Assignment operation.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
bool operator!=(_fuzzy_isa_sampling_support const &other) const noexcept
Inequality operator.
_fuzzy_isa_sampling_support(SDSL_UNUSED const cache_config &cconfig, const sa_type *sa_sample)
Constructor.
void load(std::istream &in, const sa_type *sa_sample=nullptr)
Load sampling from disk.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, const sa_type *sa_sample=nullptr)
_fuzzy_isa_sampling_support(const _fuzzy_isa_sampling_support &st)
Copy constructor.
void swap(_fuzzy_isa_sampling_support &st)
Swap operation.
bit_vector::value_type value_type
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
bool operator==(_fuzzy_isa_sampling_support const &other) const noexcept
Equality operator.
bit_vector::size_type size_type
t_csa::sa_sample_type sa_type
_fuzzy_isa_sampling_support()
Default constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
isa_sampling_tag sampling_category
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void set_vector(const sa_type *sa_sample=nullptr)
_fuzzy_sa_sampling(const _fuzzy_sa_sampling &st)
Copy constructor.
bool operator==(_fuzzy_sa_sampling const &other) const noexcept
Equality operator.
sa_sampling_tag sampling_category
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling &&st)
Move assignment operation.
_fuzzy_sa_sampling(_fuzzy_sa_sampling &&st)
Move constructor.
const t_select_isa & select_marked_isa
value_type inv(size_type i) const
Return the inv permutation at position i (already condensed!!!)
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
const t_rank_sa & rank_marked_sa
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
bit_vector::value_type value_type
_fuzzy_sa_sampling & operator=(const _fuzzy_sa_sampling &st)
Assignment operation.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
const t_bv_sa & marked_sa
_fuzzy_sa_sampling()
Default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
const t_bv_isa & marked_isa
_fuzzy_sa_sampling(cache_config &cconfig, SDSL_UNUSED const t_csa *csa=nullptr)
Constructor.
bit_vector::size_type size_type
bool operator!=(_fuzzy_sa_sampling const &other) const noexcept
Inequality operator.
void load(std::istream &in)
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_isa_sampling()
Default constructor.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
value_type operator[](size_type i) const
Returns the ISA value at position j, where.
isa_sampling_tag sampling_category
base_type::value_type value_type
_isa_sampling(const cache_config &cconfig, SDSL_UNUSED const sa_type *sa_sample=nullptr)
Constructor.
int_vector< t_width > base_type
void set_vector(SDSL_UNUSED const sa_type *)
base_type::size_type size_type
t_csa::sa_sample_type sa_type
void load(std::istream &in, SDSL_UNUSED const sa_type *sa_sample=nullptr)
Load sampling from disk.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
int_vector< t_width > base_type
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
base_type::size_type size_type
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_sa_order_sampling(const cache_config &cconfig, SDSL_UNUSED const t_csa *csa=nullptr)
Constructor.
_sa_order_sampling()
Default constructor.
sa_sampling_tag sampling_category
base_type::value_type value_type
isa_sampling_tag sampling_category
_text_order_isa_sampling_support(const _text_order_isa_sampling_support &st)
Copy constructor.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
bool operator==(_text_order_isa_sampling_support const &other) const noexcept
Equality operator.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_isa_sampling_support & operator=(const _text_order_isa_sampling_support &st)
Assignment operation.
_text_order_isa_sampling_support()
Default constructor.
void load(std::istream &in, const sa_type *sa_sample=nullptr)
Load sampling from disk.
bit_vector::size_type size_type
bool operator!=(_text_order_isa_sampling_support const &other) const noexcept
Inequality operator.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
_text_order_isa_sampling_support(SDSL_UNUSED const cache_config &cconfig, const typename std::enable_if< sa_type::text_order, sa_type * >::type sa_sample)
Constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, const sa_type *sa_sample=nullptr)
const t_sel & select_marked
void swap(_text_order_isa_sampling_support &st)
Swap operation.
void set_vector(const sa_type *sa_sample=nullptr)
bit_vector::value_type value_type
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
t_csa::sa_sample_type sa_type
value_type condensed_sa(size_type i) const
sa_sampling_tag sampling_category
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
int_vector< t_width > base_type
_text_order_sampling(const cache_config &cconfig, SDSL_UNUSED const t_csa *csa=nullptr)
Constructor.
_text_order_sampling(const _text_order_sampling &st)
Copy constructor.
base_type::size_type size_type
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
const t_rank & rank_marked
_text_order_sampling & operator=(const _text_order_sampling &st)
Assignment operation.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
void swap(_text_order_sampling &st)
Swap operation.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void load(std::istream &in)
base_type::value_type value_type
_text_order_sampling()
Default constructor.
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
reference operator[](const size_type &i) noexcept
non const version of [] operator
std::enable_if<!cereal::traits::is_output_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal if archive is not binary.
void swap(int_vector &v) noexcept
Swap method for int_vector.
int_vector_size_type size_type
int_vector & operator=(const int_vector &v)
Assignment operator.
void load(std::istream &in)
Load the int_vector for a stream.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
size_type size() const noexcept
The number of elements in the int_vector.
iterator insert(const_iterator it, value_type value)
Insert an element before the element that the iterator is pointing to.
std::enable_if<!cereal::traits::is_input_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Serialise (load) via cereal if archive is not binary.
int_vector_trait< t_width >::value_type value_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
static mm_event_proxy event(const std::string &name)
A rank structure proposed by Sebastiano Vigna.
A bit vector which compresses very sparse populated bit vectors by.
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
size_type size() const
Returns the size of the original vector.
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
void load(std::istream &in)
Loads the data structure from the given istream.
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
int_vector.hpp contains the sdsl::int_vector class.
inv_perm_support.hpp contains a class which adds access to the inverse of a permutation.
constexpr char KEY_SAMPLE_CHAR[]
void bit_compress(t_int_vec &v)
Bit compress the int_vector.
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
bool load_from_cache(T &v, const std::string &key, const cache_config &config, bool add_type_hash=false)
void construct(t_index &idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
void construct_isa(cache_config &config)
void register_cache_file(const std::string &key, cache_config &config)
Register the existing resource specified by the key to the cache.
bool store_to_file(const T &v, const std::string &file)
Store a data structure to a file.
bool cache_file_exists(const std::string &key, const cache_config &config)
Checks if the resource specified by the key exists in the cache.
int remove(const std::string &)
Remove a file.
int_vector ::size_type size(const range_type &r)
Size of a range.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Helper class for construction process.
_fuzzy_isa_sampling_support< t_csa, typename std::conditional< std::is_void< t_select_sa >::value, typename t_csa::sa_sample_type::bv_sa_type::select_1_type, t_select_sa >::type > type
_text_order_isa_sampling_support< t_csa, t_inv_perm, typename std::conditional< std::is_void< t_sel >::value, typename t_csa::sa_sample_type::bv_type::select_1_type, t_sel >::type > type
wavelet_trees.hpp contains wavelet tree implementations.