SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
suffix_tree_helper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef INCLUDED_SDSL_SUFFIX_TREE_HELPER
5#define INCLUDED_SDSL_SUFFIX_TREE_HELPER
6
7#include <cassert>
8#include <cstdlib>
9#include <stack>
10#include <stdint.h>
11
12#include <sdsl/iterators.hpp>
15
16namespace sdsl
17{
18
19template <class t_cst>
21{
22 public:
23 using iterator_category = std::forward_iterator_tag;
24 using value_type = typename t_cst::node_type;
25 using difference_type = std::ptrdiff_t;
28
29 using node_type = typename t_cst::node_type;
32
33 private:
34 const t_cst * m_cst;
35 node_type m_cur_node;
36
37 public:
39 : m_cst(nullptr){};
41 : m_cst(cst)
42 , m_cur_node(v)
43 {}
45 : m_cst(it.m_cst)
46 , m_cur_node(it.m_cur_node)
47 {}
48
49 public:
50 const_reference operator*() const { return m_cur_node; }
52 {
53 m_cur_node = m_cst->sibling(m_cur_node);
54 return *this;
55 }
57 {
58 iterator_type it = *this;
59 ++(*this);
60 return it;
61 }
62 bool operator==(const iterator_type & it) const { return it.m_cur_node == m_cur_node; }
63 bool operator!=(const iterator_type & it) const { return !(*this == it); }
64};
65
66template <class t_cst>
68{
69 public: // types
71 using node_type = typename t_cst::node_type;
72 using size_type = typename t_cst::size_type;
73
74 private: // data
75 node_type m_parent;
76 const t_cst * m_cst;
77
78 public: // constructors
80 explicit cst_node_child_proxy(const t_cst * cst, node_type v)
81 : m_parent(v)
82 , m_cst(cst){};
84 : m_parent(p.m_parent)
85 , m_cst(p.m_cst){};
86
87 public: // methods
89 {
90 return m_cst->select_child(m_parent, i + 1);
91 } // enumeration starts with 1 not 0
92 size_type size() { return m_cst->degree(m_parent); }
93 iterator_type begin() const { return iterator_type(m_cst, m_cst->select_child(m_parent, 1)); }
94 iterator_type end() const { return iterator_type(m_cst, m_cst->root()); }
95};
96
98
107template <class t_rac>
108void construct_supercartesian_tree_bp(const t_rac & vec, bit_vector & bp, const bool minimum = true)
109{
110 typedef typename t_rac::size_type size_type;
111 bp.resize(2 * vec.size()); // resize bit vector for balanaced parantheses to 2 n bits
112 util::set_to_value(bp, 0);
113 std::stack<typename t_rac::value_type> vec_stack;
114
115 size_type k = 0;
116 for (size_type i = 0; i < vec.size(); ++i)
117 {
118 typename t_rac::value_type l = vec[i];
119 if (minimum)
120 {
121 while (vec_stack.size() > 0 and l < vec_stack.top())
122 {
123 vec_stack.pop();
124 ++k;
125 /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
126 }
127 }
128 else
129 {
130 while (vec_stack.size() > 0 and l > vec_stack.top())
131 {
132 vec_stack.pop();
133 ++k;
134 /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
135 }
136 }
137 vec_stack.push(l);
138 bp[k++] = 1; // writing an opening parenthesis
139 }
140 while (vec_stack.size() > 0)
141 {
142 vec_stack.pop();
143 bp[k++] = 0; // writing a closing parenthesis
144 }
145 assert(k == 2 * vec.size());
146}
147
149
158template <class t_rac>
159bit_vector construct_supercartesian_tree_bp_succinct(const t_rac & vec, const bool minimum = true)
160{
161 typedef typename t_rac::size_type size_type;
162 bit_vector bp(2 * vec.size(), 0); // initialize result
163 if (vec.size() > 0)
164 {
165 sorted_stack_support vec_stack(vec.size());
166
167 size_type k = 0;
168 if (minimum)
169 {
170 bp[k++] = 1;
171 for (size_type i = 1; i < vec.size(); ++i)
172 {
173 if (vec[i] < vec[i - 1])
174 {
175 ++k;
176 while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()])
177 {
178 vec_stack.pop();
179 ++k; // writing a closing parenthesis, bp is already initialized to zero
180 }
181 }
182 else
183 {
184 vec_stack.push(i - 1); // "lazy stack" trick: speed-up approx. 25%
185 }
186 bp[k++] = 1; // writing an opening parenthesis
187 }
188 }
189 else
190 {
191 // no "lazy stack" trick used here
192 for (size_type i = 0; i < vec.size(); ++i)
193 {
194 while (vec_stack.size() > 0 and vec[i] > vec[vec_stack.top()])
195 {
196 vec_stack.pop();
197 ++k;
198 /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
199 }
200 vec_stack.push(i);
201 bp[k++] = 1; // writing an opening parenthesis
202 }
203 }
204 }
205 return bp;
206}
207
209
217template <uint8_t t_width>
219{
220 typedef bit_vector::size_type size_type;
221 bit_vector bp(2 * lcp_buf.size(), 0); // initialize result
222 if (lcp_buf.size() > 0)
223 {
224 sorted_multi_stack_support vec_stack(lcp_buf.size());
225
226 size_type k = 0;
227 if (minimum)
228 {
229 bp[k++] = 1;
230 size_type last = lcp_buf[0];
231 for (size_type i = 1, x; i < lcp_buf.size(); ++i)
232 {
233 x = lcp_buf[i];
234 if (x < last)
235 {
236 ++k; // writing a closing parenthesis for last
237 while (!vec_stack.empty() and x < vec_stack.top())
238 {
239 vec_stack.pop();
240 ++k; // writing a closing parenthesis, bp is already initialized to zeros
241 }
242 }
243 else
244 {
245 vec_stack.push(last); // "lazy stack" trick: speed-up about 25 %
246 }
247 bp[k++] = 1; // writing an opening parenthesis
248 last = x;
249 }
250 }
251 else
252 {
253 // no "lazy stack" trick use here
254 for (size_type i = 0, x; i < lcp_buf.size(); ++i)
255 {
256 x = lcp_buf[i];
257 while (!vec_stack.empty() and x > vec_stack.top())
258 {
259 vec_stack.pop();
260 ++k; // writing a closing parenthesis, bp is already initialized to zeros
261 }
262 vec_stack.push(x);
263 bp[k++] = 1; // writing an opening parenthesis
264 }
265 }
266 }
267 return bp;
268}
269
272
281template <uint8_t t_width>
283 bit_vector & bp,
284 bit_vector & bp_fc,
285 const bool minimum = true)
286{
287 typedef bit_vector::size_type size_type;
288 size_type n = lcp_buf.size();
289 bp.resize(2 * n); // resize bit vector for balanced parentheses to 2 n bits
290 bp_fc.resize(n);
291 if (n == 0) // if n == 0 we are done
292 return 0;
293 size_type fc_cnt = 0; // first child counter
294 util::set_to_value(bp, 0);
295 util::set_to_value(bp_fc, 0);
296 sorted_multi_stack_support vec_stack(n);
297
298 size_type k = 0;
299 size_type k_fc = 0; // first child index
300 if (minimum)
301 {
302 // no "lazy stack" trick used here
303 for (size_type i = 0, x; i < n; ++i)
304 {
305 x = lcp_buf[i];
306 while (!vec_stack.empty() and x < vec_stack.top())
307 {
308 if (vec_stack.pop())
309 {
310 bp_fc[k_fc] = 1;
311 ++fc_cnt;
312 }
313 ++k; // writing a closing parenthesis, bp is already initialized to zeros
314 ++k_fc; // write a bit in first_child
315 }
316 vec_stack.push(x);
317 bp[k++] = 1; // writing an opening parenthesis
318 }
319 }
320 else
321 {
322 // no "lazy stack" trick used here
323 for (size_type i = 0, x; i < n; ++i)
324 {
325 x = lcp_buf[i];
326 while (!vec_stack.empty() and x > vec_stack.top())
327 {
328 if (vec_stack.pop())
329 {
330 bp_fc[k_fc] = 1;
331 ++fc_cnt;
332 }
333 ++k; // writing a closing parenthesis, bp is already initialized to zeros
334 ++k_fc; // write a bit in first_child
335 }
336 vec_stack.push(x);
337 bp[k++] = 1; // writing an opening parenthesis
338 }
339 }
340 while (!vec_stack.empty())
341 {
342 if (vec_stack.pop())
343 {
344 bp_fc[k_fc] = 1;
345 ++fc_cnt;
346 }
347 // writing a closing parenthesis in bp, not necessary as bp is initialized with zeros
348 ++k;
349 ++k_fc;
350 }
351 return fc_cnt;
352}
353
354// Gets ISA[SA[idx]+d]
355// d = depth of the character 0 = first position
356template <class t_csa>
357typename t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, const t_csa & csa)
358{
359 if (d == 0) return idx;
360 // if we have to apply \f$\LF\f$ or \f$\Phi\f$ more
361 // than 2*d times to calc csa(csa[idx]+d), we opt to
362 // apply \f$ \Phi \f$ d times
363 if (csa.sa_sample_dens + csa.isa_sample_dens > 2 * d + 2)
364 {
365 for (typename t_csa::size_type i = 0; i < d; ++i) idx = csa.psi[idx];
366 return idx;
367 }
368 return csa.isa[csa[idx] + d];
369}
370
371// has_id<X>::value is true if class X has
372// implement method id
373// Adapted solution from jrok's proposal:
374// http://stackoverflow.com/questions/87372/check-if-a-class-has-a-member-function-of-a-given-signature
375template <typename t_wt>
376struct has_id
377{
378 template <typename T>
379 static constexpr auto
380 check(T *) -> typename std::is_same<decltype(std::declval<T>().id(std::declval<typename T::node_type &>())),
381 typename T::size_type>::type
382 {
383 return std::true_type();
384 }
385 template <typename>
386 static constexpr std::false_type check(...)
387 {
388 return std::false_type();
389 }
390 typedef decltype(check<t_wt>(nullptr)) type;
391 static constexpr bool value = type::value;
392};
393} // namespace sdsl
394
395#endif
std::forward_iterator_tag iterator_category
bool operator!=(const iterator_type &it) const
typename t_cst::node_type value_type
bool operator==(const iterator_type &it) const
cst_node_child_proxy_iterator(const t_cst *cst, node_type v)
cst_node_child_proxy_iterator(const iterator_type &it)
typename t_cst::size_type size_type
typename t_cst::node_type node_type
iterator_type begin() const
cst_node_child_proxy(const cst_node_child_proxy &p)
cst_node_child_proxy(const t_cst *cst, node_type v)
cst_node_child_proxy_iterator< t_cst > iterator_type
node_type operator[](size_type i) const
uint64_t size() const
Returns the number of elements currently stored.
int_vector_size_type size_type
Definition: int_vector.hpp:229
size_type size() const noexcept
The number of elements in the int_vector.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:521
Stack which contains elements from [0..n] in sorted order. Duplicates are possible.
bool empty() const
Returns if the stack is empty.
size_type top() const
Returns the topmost index on the stack.
bool pop()
Pop the topmost index of the stack.
bool push(size_type x)
Push the index x of vector vec onto the stack.
A stack which contains strictly increasing numbers in the range from to .
size_type top() const
Returns the topmost index on the stack.
void pop()
Pop the topmost index of the stack.
void push(size_type x)
Push the index x of vector vec onto the stack.
size_type size() const
Returns the number of element is the stack.
iterators.hpp contains an generic iterator for random access containers.
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition: util.hpp:563
Namespace for the succinct data structure library.
t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, const t_csa &csa)
bit_vector construct_supercartesian_tree_bp_succinct(const t_rac &vec, const bool minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
bit_vector::size_type construct_supercartesian_tree_bp_succinct_and_first_child(int_vector_buffer< t_width > &lcp_buf, bit_vector &bp, bit_vector &bp_fc, const bool minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
void construct_supercartesian_tree_bp(const t_rac &vec, bit_vector &bp, const bool minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
sorted_multi_stack_support.hpp contains a data structure for a stack which contains elements from [0....
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().id(std::declval< typename T::node_type & >())), typename T::size_type >::type
static constexpr bool value
decltype(check< t_wt >(nullptr)) type