SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
csa_bitcompressed.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_CSA_UNCOMPRESSED
9#define INCLUDED_SDSL_CSA_UNCOMPRESSED
10
11#include <algorithm>
12#include <iomanip>
13#include <iostream>
14#include <iterator>
15#include <string>
16
19#include <sdsl/int_vector.hpp>
20#include <sdsl/iterators.hpp>
23#include <sdsl/util.hpp>
24
25namespace sdsl
26{
27
29
44template <class t_alphabet_strat = byte_alphabet>
46{
48
49 public:
50 typedef uint64_t value_type; // STL Container requirement
52 typedef const_iterator iterator; // STL Container requirement
56 typedef const pointer const_pointer;
57 typedef int_vector<>::size_type size_type; // STL Container requirement
59 typedef ptrdiff_t difference_type; // STL Container requirement
68 typedef t_alphabet_strat alphabet_type;
69 typedef typename alphabet_type::char_type char_type; // Note: This is the char type of the CSA not the WT!
70 typedef typename alphabet_type::comp_char_type comp_char_type;
71 typedef typename alphabet_type::string_type string_type;
72 typedef typename alphabet_type::alphabet_category alphabet_category;
74
77
78 enum
79 {
82 };
83
84 private:
85 sa_sample_type m_sa; // vector for suffix array values
86 isa_sample_type m_isa; // vector for inverse suffix array values
87 alphabet_type m_alphabet;
88
89 public:
90 const typename alphabet_type::char2comp_type & char2comp = m_alphabet.char2comp;
91 const typename alphabet_type::comp2char_type & comp2char = m_alphabet.comp2char;
92 const typename alphabet_type::C_type & C = m_alphabet.C;
93 const typename alphabet_type::sigma_type & sigma = m_alphabet.sigma;
94 const psi_type psi = psi_type(*this);
95 const lf_type lf = lf_type(*this);
96 const bwt_type bwt = bwt_type(*this);
97 const bwt_type L = bwt_type(*this);
98 const isa_type & isa = m_isa;
100 const text_type text = text_type(*this);
101 const sa_sample_type & sa_sample = m_sa;
103
108 : m_sa(csa.m_sa)
109 , m_isa(csa.m_isa)
110 , m_alphabet(csa.m_alphabet)
111 {}
112
114 csa_bitcompressed(csa_bitcompressed && csa) { *this = std::move(csa); }
115
118 {
119 std::string text_file = cache_file_name(key_text<alphabet_type::int_width>(), config);
122 size_type n = text_buf.size();
123
124 m_alphabet = alphabet_type(text_buf, n);
125 m_sa = sa_sample_type(config);
126 m_isa = isa_sample_type(config);
127 }
128
130
133 size_type size() const { return m_sa.size(); }
134
136
140
142
145 bool empty() const { return m_sa.empty(); }
146
148
151 const_iterator begin() const { return const_iterator(this, 0); }
152
154
157 const_iterator end() const { return const_iterator(this, size()); }
158
160
164 inline value_type operator[](size_type i) const { return m_sa[i]; }
165
167
171 {
172 if (this != &csa)
173 {
174 csa_bitcompressed tmp(csa);
175 *this = std::move(tmp);
176 }
177 return *this;
178 }
179
181
185 {
186 if (this != &csa)
187 {
188 m_sa = std::move(csa.m_sa);
189 m_isa = std::move(csa.m_isa);
190 m_alphabet = std::move(csa.m_alphabet);
191 }
192 return *this;
193 }
194
196 bool operator==(csa_bitcompressed const & other) const noexcept
197 {
198 return (m_sa == other.m_sa) && (m_isa == other.m_isa) && (m_alphabet == other.m_alphabet);
199 }
200
202 bool operator!=(csa_bitcompressed const & other) const noexcept { return !(*this == other); }
203
205
208 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
209 {
210 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
211 size_type written_bytes = 0;
212 written_bytes += m_sa.serialize(out, child, "m_sa");
213 written_bytes += m_isa.serialize(out, child, "m_isa");
214 written_bytes += m_alphabet.serialize(out, child, "m_alphabet");
215 structure_tree::add_size(child, written_bytes);
216 return written_bytes;
217 }
218
219 void load(std::istream & in)
220 {
221 m_sa.load(in);
222 m_isa.load(in);
223 m_alphabet.load(in);
224 }
225
226 template <typename archive_t>
227 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
228 {
229 ar(CEREAL_NVP(m_sa));
230 ar(CEREAL_NVP(m_isa));
231 ar(CEREAL_NVP(m_alphabet));
232 }
233
234 template <typename archive_t>
235 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
236 {
237 ar(CEREAL_NVP(m_sa));
238 ar(CEREAL_NVP(m_isa));
239 ar(CEREAL_NVP(m_alphabet));
240 }
241
242 size_type get_sample_dens() const { return 1; }
243
244 private:
245 // Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text.
246 /*
247 * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
248 * \param c The symbol to count the occurrences in the prefix.
249 * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
250 * \par Time complexity
251 * \f$ \Order{\log n} \f$
252 */
253 size_type rank_bwt(size_type i, const char_type c) const
254 {
255 // TODO: special case if c == BWT[i-1] we can use LF to get a constant time answer
257 if (cc == 0 and c != 0) // character is not in the text => return 0
258 return 0;
259 // binary search the interval [C[cc]..C[cc+1]-1] for the result
260 size_type lower_b = C[cc],
261 upper_b = C[((size_type)1) + cc]; // lower_b inclusive, upper_b exclusive
262 while (lower_b + 1 < upper_b)
263 {
264 size_type mid = (lower_b + upper_b) / 2;
265 if (psi[mid] >= i)
266 upper_b = mid;
267 else
268 lower_b = mid;
269 }
270 if (lower_b > C[cc])
271 return lower_b - C[cc] + 1;
272 else
273 { // lower_b == m_C[cc]
274 return psi[lower_b] < i; // 1 if psi[lower_b]<i, 0 otherwise
275 }
276 }
277
278 // Calculates the i-th occurrence of symbol c in the BWT of the original text.
279 /*
280 * \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
281 * \param c Character c.
282 * \returns The i-th occurrence of c in the BWT or size() if c does
283 * not occur t times in BWT>
284 * \par Time complexity
285 * \f$ \Order{t_{\Psi}} \f$
286 */
287 size_type select_bwt(size_type i, const char_type c) const
288 {
290 if (cc == 0 and c != 0) // character is not in the text => return size()
291 return size();
292 if (C[cc] + i - 1 < C[((size_type)1) + cc]) { return psi[C[cc] + i - 1]; }
293 return size();
294 }
295};
296
297} // end namespace sdsl
298#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
A wrapper for the bwt of a compressed suffix array that is based on the function.
A class for the uncompressed suffix array (SA).
bwt_of_csa_psi< csa_bitcompressed > bwt_type
csa_bitcompressed()
Default constructor.
const alphabet_type::char2comp_type & char2comp
bool operator==(csa_bitcompressed const &other) const noexcept
Equality operator.
traverse_csa_saisa< csa_bitcompressed, false > lf_type
const alphabet_type::C_type & C
const alphabet_type::comp2char_type & comp2char
_isa_sampling< csa_bitcompressed, 0 > isa_sample_type
const sa_sample_type & sa_sample
void load(std::istream &in)
_sa_order_sampling< csa_bitcompressed, 0 > sa_sample_type
bool empty() const
Returns if the data structure is empty.
size_type get_sample_dens() const
value_type operator[](size_type i) const
[]-operator
csa_bitcompressed(const csa_bitcompressed &csa)
Copy constructor.
const alphabet_type::sigma_type & sigma
bool operator!=(csa_bitcompressed const &other) const noexcept
Inequality operator.
csa_bitcompressed & operator=(csa_bitcompressed &&csa)
Assignment Move Operator.
size_type size() const
Number of elements in the instance.
csa_bitcompressed csa_type
alphabet_type::string_type string_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
csa_bitcompressed(csa_bitcompressed &&csa)
Move constructor.
csa_bitcompressed(cache_config &config)
Constructor.
random_access_const_iterator< csa_bitcompressed > const_iterator
traverse_csa_saisa< csa_bitcompressed, true > psi_type
const first_row_type F
text_of_csa< csa_bitcompressed > text_type
const isa_sample_type & isa_sample
alphabet_type::char_type char_type
const_iterator begin() const
Returns a const_iterator to the first element.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
const_iterator end() const
Returns a const_iterator to the element after the last element.
csa_bitcompressed & operator=(const csa_bitcompressed &csa)
Assignment Operator.
const value_type const_reference
static size_type max_size()
Returns the largest size that csa_bitcompressed can ever have.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
alphabet_type::comp_char_type comp_char_type
t_alphabet_strat alphabet_type
first_row_of_csa< csa_bitcompressed > first_row_type
int_vector ::size_type size_type
alphabet_type::alphabet_category alphabet_category
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:542
Generic iterator for a random access container.
Definition: iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A helper class for the function for (compressed) suffix arrays which provide also the inverse suffix...
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
csa_sampling_strategy.hpp includes different strategy classes for suffix array sampling in the CSAs.
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
constexpr char KEY_SA[]
Definition: config.hpp:37
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
Contains declarations and definitions of data structure concepts.
Helper class for construction process.
Definition: config.hpp:67
suffix_array_helper.hpp contains some helper classes for CSTs
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.