SDSL 3.0.2
Succinct Data Structure Library
Loading...
Searching...
No Matches
csa_sampling_strategy.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_CSA_SAMPLING_STRATEGY
10#define INCLUDED_CSA_SAMPLING_STRATEGY
11
12/*
13 * Text = ABCDEFABCDEF$
14 * 0123456789012
15 * sa_sample_dens = 2
16 * *1 SA *2
17 * * 12 * $
18 * 06 * ABCDEF$
19 * * 00 * ABCDEFABCDEF$
20 * 07 BCDEF$
21 * * 01 BCDEFABCDEF$
22 * 08 * CDEF$
23 * * 02 * CDEFABCDEF$
24 * 09 DEF$
25 * * 03 DEFABCDEF$
26 * 10 * EF$
27 * * 04 * EFABCDEF$
28 * 11 F$
29 * * 05 FABCDEF$
30 *
31 * The first sampling (*1) is called suffix order sampling. It has the advantage, that
32 * we don't need to store a bitvector, which marks the sampled suffixes, since a suffix
33 * at index \‍(i\‍) in the suffix array is marked if \‍( 0 \equiv i \mod sa_sample_dens \‍).
34 *
35 * The second sampling (*2) is called text order sampling. It is also called regular in [1].
36 *
37 * [1] P.Ferragina, J. Siren, R. Venturini: Distribution-Aware Compressed Full-Text Indexes, ESA 2011
38 */
39
40#include <iosfwd>
41#include <set>
42#include <stdint.h>
43#include <string>
44#include <tuple>
45#include <type_traits>
46#include <utility>
47
48#include <sdsl/bits.hpp>
49#include <sdsl/cereal.hpp>
50#include <sdsl/config.hpp>
51#include <sdsl/construct.hpp>
54#include <sdsl/int_vector.hpp>
57#include <sdsl/io.hpp>
59#include <sdsl/ram_fs.hpp>
61#include <sdsl/rrr_vector.hpp>
62#include <sdsl/sd_vector.hpp>
65#include <sdsl/util.hpp>
66#include <sdsl/wt_int.hpp>
67
68namespace sdsl
69{
70
71template <class t_csa, uint8_t t_width = 0>
72class _sa_order_sampling : public int_vector<t_width>
73{
74public:
76 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
77 typedef typename base_type::value_type value_type; //
78 enum
79 {
80 sample_dens = t_csa::sa_sample_dens
81 };
82 enum
83 {
84 text_order = false
85 };
87
91
93 /*
94 * \param cconfig Cache configuration (SA is expected to be cached.).
95 * \param csa Pointer to the corresponding CSA. Not used in this class.
96 * \par Time complexity
97 * Linear in the size of the suffix array.
98 */
99 _sa_order_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr)
100 {
102 size_type n = sa_buf.size();
103 this->width(bits::hi(n) + 1);
104 this->resize((n + sample_dens - 1) / sample_dens);
105
106 for (size_type i = 0, cnt_mod = sample_dens, cnt_sum = 0; i < n; ++i, ++cnt_mod)
107 {
108 size_type sa = sa_buf[i];
109 if (sample_dens == cnt_mod)
110 {
111 cnt_mod = 0;
112 base_type::operator[](cnt_sum++) = sa;
113 }
114 }
115 }
116
118 inline bool is_sampled(size_type i) const
119 {
120 return 0 == (i % sample_dens);
121 }
122
125 {
127 }
128};
129
130template <uint8_t t_width = 0>
132{
133 template <class t_csa>
136};
137
138template <class t_csa, class t_bv = bit_vector, class t_rank = typename t_bv::rank_1_type, uint8_t t_width = 0>
139class _text_order_sampling : public int_vector<t_width>
140{
141private:
142 t_bv m_marked;
143 t_rank m_rank_marked;
144
145public:
147 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
149 typedef t_bv bv_type;
150 enum
151 {
152 sample_dens = t_csa::sa_sample_dens
153 };
154 enum
155 {
156 text_order = true
157 };
159
160 bv_type const & marked = m_marked;
161 t_rank const & rank_marked = m_rank_marked;
162
166
168 /*
169 * \param cconfig Cache configuration (SA is expected to be cached.).
170 * \param csa Pointer to the corresponding CSA. Not used in this class.
171 * \par Time complexity
172 * Linear in the size of the suffix array.
173 */
174 _text_order_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr)
175 {
177 size_type n = sa_buf.size();
178 bit_vector marked(n, 0); // temporary bitvector for the marked text positions
179 this->width(bits::hi(n / sample_dens) + 1);
180 this->resize((n + sample_dens - 1) / sample_dens);
181
182 for (size_type i = 0, sa_cnt = 0; i < n; ++i)
183 {
184 size_type sa = sa_buf[i];
185 if (0 == (sa % sample_dens))
186 {
187 marked[i] = 1;
188 base_type::operator[](sa_cnt++) = sa / sample_dens;
189 }
190 }
191 m_marked = std::move(t_bv(marked));
192 util::init_support(m_rank_marked, &m_marked);
193 }
194
197 {
198 m_marked = st.m_marked;
199 m_rank_marked = st.m_rank_marked;
200 m_rank_marked.set_vector(&m_marked);
201 }
202
204 inline bool is_sampled(size_type i) const
205 {
206 return m_marked[i];
207 }
208
211 {
212 return base_type::operator[](m_rank_marked(i)) * sample_dens;
213 }
214
216 {
217 return base_type::operator[](i);
218 }
219
222 {
223 if (this != &st)
224 {
226 m_marked = st.m_marked;
227 m_rank_marked = st.m_rank_marked;
228 m_rank_marked.set_vector(&m_marked);
229 }
230 return *this;
231 }
232
235 {
236 base_type::swap(st);
237 m_marked.swap(st.m_marked);
238 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
239 }
240
241 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
242 {
243 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
244 size_type written_bytes = 0;
245 written_bytes += base_type::serialize(out, child, "samples");
246 written_bytes += m_marked.serialize(out, child, "marked");
247 written_bytes += m_rank_marked.serialize(out, child, "rank_marked");
248 structure_tree::add_size(child, written_bytes);
249 return written_bytes;
250 }
251
252 void load(std::istream & in)
253 {
254 base_type::load(in);
255 m_marked.load(in);
256 m_rank_marked.load(in);
257 m_rank_marked.set_vector(&m_marked);
258 }
259
260 template <typename archive_t>
261 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
262 {
264 ar(CEREAL_NVP(m_marked));
265 ar(CEREAL_NVP(m_rank_marked));
266 }
267
268 template <typename archive_t>
269 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
270 {
272 ar(CEREAL_NVP(m_marked));
273 ar(CEREAL_NVP(m_rank_marked));
274 m_rank_marked.set_vector(&m_marked);
275 }
276};
277
278template <class t_bit_vec = sd_vector<>, class t_rank_sup = typename t_bit_vec::rank_1_type, uint8_t t_width = 0>
285
286template <class t_csa,
287 class t_bv_sa = sd_vector<>,
288 class t_bv_isa = sd_vector<>,
289 class t_rank_sa = typename t_bv_sa::rank_1_type,
290 class t_select_isa = typename t_bv_isa::select_1_type>
292{
293private:
294 t_bv_sa m_marked_sa;
295 t_rank_sa m_rank_marked_sa;
296 t_bv_isa m_marked_isa;
297 t_select_isa m_select_marked_isa;
298 wt_int<rrr_vector<63>> m_inv_perm;
299
300public:
301 typedef typename bit_vector::size_type size_type; // make typedefs of base_type visible
303 typedef t_bv_sa bv_sa_type;
304 enum
305 {
306 sample_dens = t_csa::sa_sample_dens
307 };
308 enum
309 {
310 text_order = true
311 };
313
314 t_bv_sa const & marked_sa = m_marked_sa;
315 t_rank_sa const & rank_marked_sa = m_rank_marked_sa;
316 t_bv_isa const & marked_isa = m_marked_isa;
317 t_select_isa const & select_marked_isa = m_select_marked_isa;
318
322
324 /*
325 * \param cconfig Cache configuration (SA is expected to be cached.).
326 * \param csa Pointer to the corresponding CSA. Not used in this class.
327 * \par Time complexity
328 * Linear in the size of the suffix array.
329 */
330 _fuzzy_sa_sampling(cache_config & cconfig, SDSL_UNUSED t_csa const * csa = nullptr)
331 {
332 {
333 // (2) check, if the suffix array is cached
334 if (!cache_file_exists(conf::KEY_ISA, cconfig))
335 {
336 auto event = memory_monitor::event("ISA");
337 construct_isa(cconfig);
338 }
340 }
341 {
343 size_type n = isa_buf.size();
344 bit_vector marked_isa(n, 0); // temporary bitvector for marked ISA positions
345 bit_vector marked_sa(n, 0); // temporary bitvector for marked SA positions
346 int_vector<> inv_perm((n + sample_dens - 1) / sample_dens, 0, bits::hi(n) + 1);
347 size_type cnt = 0;
348 size_type runs = 1;
349
350 uint64_t min_prev_val = 0;
351 for (size_type i = 0; i < n; i += sample_dens)
352 {
353 size_type pos_min = i;
354 size_type pos_cnd = isa_buf[i] >= min_prev_val ? i : n;
355 for (size_type j = i + 1; j < i + sample_dens and j < n; ++j)
356 {
357 if (isa_buf[j] < isa_buf[pos_min])
358 pos_min = j;
359 if (isa_buf[j] >= min_prev_val)
360 {
361 if (pos_cnd == n)
362 {
363 pos_cnd = j;
364 }
365 else if (isa_buf[j] < isa_buf[pos_cnd])
366 {
367 pos_cnd = j;
368 }
369 }
370 }
371 if (pos_cnd == n)
372 { // increasing sequence can not be extended
373 pos_cnd = pos_min;
374 ++runs;
375 }
376 min_prev_val = isa_buf[pos_cnd];
377 marked_isa[pos_cnd] = 1;
378 inv_perm[cnt++] = min_prev_val;
379 marked_sa[min_prev_val] = 1;
380 }
381 m_marked_isa = std::move(t_bv_isa(marked_isa));
382 util::init_support(m_select_marked_isa, &m_marked_isa);
383 {
385 for (size_type i = 0; i < inv_perm.size(); ++i)
386 {
387 inv_perm[i] = rank_marked_sa(inv_perm[i]);
388 }
389 }
390 util::bit_compress(inv_perm);
391
392 m_marked_sa = std::move(t_bv_sa(marked_sa));
393 util::init_support(m_rank_marked_sa, &m_marked_sa);
394
395 std::string tmp_key =
396 "fuzzy_isa_samples_" + util::to_string(util::pid()) + "_" + util::to_string(util::id());
397 std::string tmp_file_name = cache_file_name(tmp_key, cconfig);
398 store_to_file(inv_perm, tmp_file_name);
399 construct(m_inv_perm, tmp_file_name, 0);
400 sdsl::remove(tmp_file_name);
401 }
402 }
403
406 m_marked_sa(st.m_marked_sa),
407 m_rank_marked_sa(st.m_rank_marked_sa),
408 m_marked_isa(st.m_marked_isa),
409 m_select_marked_isa(st.m_select_marked_isa),
410 m_inv_perm(st.m_inv_perm)
411 {
412 m_rank_marked_sa.set_vector(&m_marked_sa);
413 m_select_marked_isa.set_vector(&m_marked_isa);
414 }
415
418 m_marked_sa(std::move(st.m_marked_sa)),
419 m_rank_marked_sa(std::move(st.m_rank_marked_sa)),
420 m_marked_isa(std::move(st.m_marked_isa)),
421 m_select_marked_isa(std::move(st.m_select_marked_isa)),
422 m_inv_perm(std::move(st.m_inv_perm))
423 {
424 m_rank_marked_sa.set_vector(&m_marked_sa);
425 m_select_marked_isa.set_vector(&m_marked_isa);
426 }
427
429 inline bool is_sampled(size_type i) const
430 {
431 return m_marked_sa[i];
432 }
433
436 {
437 return m_select_marked_isa(m_inv_perm.select(1, m_rank_marked_sa(i)) + 1);
438 }
439
441 inline value_type inv(size_type i) const
442 {
443 return m_inv_perm[i];
444 }
445
447 {
448 return m_inv_perm.size();
449 }
450
453 {
454 if (this != &st)
455 {
456 _fuzzy_sa_sampling tmp(st);
457 *this = std::move(tmp);
458 }
459 return *this;
460 }
461
464 {
465 m_marked_sa = std::move(st.m_marked_sa);
466 m_rank_marked_sa = std::move(st.m_rank_marked_sa);
467 m_marked_isa = std::move(st.m_marked_isa);
468 m_select_marked_isa = std::move(st.m_select_marked_isa);
469 m_inv_perm = std::move(st.m_inv_perm);
470 m_rank_marked_sa.set_vector(&m_marked_sa);
471 m_select_marked_isa.set_vector(&m_marked_isa);
472 return *this;
473 }
474
475 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
476 {
477 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
478 size_type written_bytes = 0;
479 written_bytes += m_marked_sa.serialize(out, child, "marked_sa");
480 written_bytes += m_rank_marked_sa.serialize(out, child, "rank_marked_sa");
481 written_bytes += m_marked_isa.serialize(out, child, "marked_isa");
482 written_bytes += m_select_marked_isa.serialize(out, child, "select_marked_isa");
483 written_bytes += m_inv_perm.serialize(out, child, "inv_perm");
484 structure_tree::add_size(child, written_bytes);
485 return written_bytes;
486 }
487
488 void load(std::istream & in)
489 {
490 m_marked_sa.load(in);
491 m_rank_marked_sa.load(in);
492 m_rank_marked_sa.set_vector(&m_marked_sa);
493 m_marked_isa.load(in);
494 m_select_marked_isa.load(in);
495 m_select_marked_isa.set_vector(&m_marked_isa);
496 m_inv_perm.load(in);
497 }
498
499 template <typename archive_t>
500 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
501 {
502 ar(CEREAL_NVP(m_marked_sa));
503 ar(CEREAL_NVP(m_rank_marked_sa));
504 ar(CEREAL_NVP(m_marked_isa));
505 ar(CEREAL_NVP(m_select_marked_isa));
506 ar(CEREAL_NVP(m_inv_perm));
507 }
508
509 template <typename archive_t>
510 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
511 {
512 ar(CEREAL_NVP(m_marked_sa));
513 ar(CEREAL_NVP(m_rank_marked_sa));
514 m_rank_marked_sa.set_vector(&m_marked_sa);
515 ar(CEREAL_NVP(m_marked_isa));
516 ar(CEREAL_NVP(m_select_marked_isa));
517 m_select_marked_isa.set_vector(&m_marked_isa);
518 ar(CEREAL_NVP(m_inv_perm));
519 }
520
522 bool operator==(_fuzzy_sa_sampling const & other) const noexcept
523 {
524 return (m_marked_sa == other.m_marked_sa) && (m_rank_marked_sa == other.m_rank_marked_sa)
525 && (m_marked_isa == other.m_marked_isa) && (m_select_marked_isa == other.m_select_marked_isa)
526 && (m_inv_perm == other.m_inv_perm);
527 }
528
530 bool operator!=(_fuzzy_sa_sampling const & other) const noexcept
531 {
532 return !(*this == other);
533 }
534};
535template <class t_bv_sa = sd_vector<>,
536 class t_bv_isa = sd_vector<>,
537 class t_rank_sa = typename t_bv_sa::rank_1_type,
538 class t_select_isa = typename t_bv_isa::select_1_type>
545
546/*
547 * Text = ABCDEFABCDEF$
548 * 0123456789012
549 * sa_sample_dens = 4
550 * sa_sample_chars = {B,E}
551 * SA BWT (1)
552 * 12 F * $
553 * 06 F ABCDEF$
554 * 00 $ * ABCDEFABCDEF$
555 * 07 A BCDEF$
556 * 01 A BCDEFABCDEF$
557 * 08 B * CDEF$
558 * 02 B * CDEFABCDEF$
559 * 09 C DEF$
560 * 03 C DEFABCDEF$
561 * 10 D EF$
562 * 04 D * EFABCDEF$
563 * 11 E * F$
564 * 05 E * FABCDEF$
565 *
566 * In this sampling a suffix x=SA[i] is marked if x \‍( 0 \equiv x \mod sa_sample_dens \‍) or
567 * BWT[i] is contained in sa_sample_chars.
568 */
569
570template <class t_csa, class t_bv = bit_vector, class t_rank = typename t_bv::rank_1_type, uint8_t t_width = 0>
571class _bwt_sampling : public int_vector<t_width>
572{
573private:
574 t_bv m_marked;
575 t_rank m_rank_marked;
576
577public:
579 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
581 enum
582 {
583 sample_dens = t_csa::sa_sample_dens
584 };
585 enum
586 {
587 text_order = false
588 };
590
594
596 /*
597 * \param cconfig Cache configuration (BWT,SA, and SAMPLE_CHARS are expected to be cached.).
598 * \param csa Pointer to the corresponding CSA. Not used in this class.
599 * \par Time complexity
600 * Linear in the size of the suffix array.
601 */
602 _bwt_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr)
603 {
606 cache_file_name(key_bwt<t_csa::alphabet_type::int_width>(), cconfig));
607 size_type n = sa_buf.size();
608 bit_vector marked(n, 0); // temporary bitvector for the marked text positions
609 this->width(bits::hi(n) + 1);
610 int_vector<> sample_char;
611 typedef typename t_csa::char_type char_type;
612 std::set<char_type> char_map;
613 if (load_from_cache(sample_char, conf::KEY_SAMPLE_CHAR, cconfig))
614 {
615 for (uint64_t i = 0; i < sample_char.size(); ++i)
616 {
617 char_map.insert((char_type)sample_char[i]);
618 }
619 }
620 size_type sa_cnt = 0;
621 for (size_type i = 0; i < n; ++i)
622 {
623 size_type sa = sa_buf[i];
624 char_type bwt = bwt_buf[i];
625 if (0 == (sa % sample_dens))
626 {
627 marked[i] = 1;
628 ++sa_cnt;
629 }
630 else if (char_map.find(bwt) != char_map.end())
631 {
632 marked[i] = 1;
633 ++sa_cnt;
634 }
635 }
636 this->resize(sa_cnt);
637 sa_cnt = 0;
638 for (size_type i = 0; i < n; ++i)
639 {
640 size_type sa = sa_buf[i];
641 if (marked[i])
642 {
643 base_type::operator[](sa_cnt++) = sa;
644 }
645 }
646 m_marked = std::move(marked);
647 util::init_support(m_rank_marked, &m_marked);
648 }
649
652 {
653 m_marked = st.m_marked;
654 m_rank_marked = st.m_rank_marked;
655 m_rank_marked.set_vector(&m_marked);
656 }
657
659 inline bool is_sampled(size_type i) const
660 {
661 return m_marked[i];
662 }
663
666 {
667 return base_type::operator[](m_rank_marked(i)) * sample_dens;
668 }
669
672 {
673 if (this != &st)
674 {
676 m_marked = st.m_marked;
677 m_rank_marked = st.m_rank_marked;
678 m_rank_marked.set_vector(&m_marked);
679 }
680 return *this;
681 }
682
685 {
686 base_type::swap(st);
687 m_marked.swap(st.m_marked);
688 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
689 }
690
691 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
692 {
693 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
694 size_type written_bytes = 0;
695 written_bytes += base_type::serialize(out, child, "samples");
696 written_bytes += m_marked.serialize(out, child, "marked");
697 written_bytes += m_rank_marked.serialize(out, child, "rank_marked");
698 structure_tree::add_size(child, written_bytes);
699 return written_bytes;
700 }
701
702 void load(std::istream & in)
703 {
704 base_type::load(in);
705 m_marked.load(in);
706 m_rank_marked.load(in);
707 m_rank_marked.set_vector(&m_marked);
708 }
709
710 template <typename archive_t>
711 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
712 {
714 ar(CEREAL_NVP(m_marked));
715 ar(CEREAL_NVP(m_rank_marked));
716 }
717
718 template <typename archive_t>
719 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
720 {
722 ar(CEREAL_NVP(m_marked));
723 ar(CEREAL_NVP(m_rank_marked));
724 m_rank_marked.set_vector(&m_marked);
725 }
726};
727
728template <class t_bit_vec = bit_vector, class t_rank_sup = typename t_bit_vec::rank_1_type, uint8_t t_width = 0>
735
736template <class t_csa, uint8_t t_width = 0>
737class _isa_sampling : public int_vector<t_width>
738{
739public:
741 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
743 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
744 enum
745 {
746 sample_dens = t_csa::isa_sample_dens
747 };
749
753
755 /*
756 * \param cconfig Cache configuration (SA is expected to be cached.).
757 * \param sa_sample Pointer to the corresponding SA sampling. Not used in this class.
758 * \par Time complexity
759 * Linear in the size of the suffix array.
760 */
761 _isa_sampling(cache_config const & cconfig, SDSL_UNUSED sa_type const * sa_sample = nullptr)
762 {
764 size_type n = sa_buf.size();
765 if (n >= 1)
766 { // so n+t_csa::isa_sample_dens >= 2
767 this->width(bits::hi(n) + 1);
768 this->resize((n - 1) / sample_dens + 1);
769 }
770 for (size_type i = 0; i < this->size(); ++i)
771 base_type::operator[](i) = 0;
772
773 for (size_type i = 0; i < n; ++i)
774 {
775 size_type sa = sa_buf[i];
776 if ((sa % sample_dens) == 0)
777 {
779 }
780 }
781 }
782
785 {
787 }
788
790 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
791 {
792 size_type ci = i / sample_dens;
793 return std::make_tuple(base_type::operator[](ci), ci * sample_dens);
794 }
795
797 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
798 {
799 size_type ci = (i / sample_dens + 1) % this->size();
800 return std::make_tuple(base_type::operator[](ci), ci * sample_dens);
801 }
802
804 void load(std::istream & in, SDSL_UNUSED sa_type const * sa_sample = nullptr)
805 {
806 base_type::load(in);
807 }
808
809 template <typename archive_t>
810 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
811 {
813 }
814
815 template <typename archive_t>
816 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
817 {
819 }
820
822 {}
823};
824
825template <uint8_t t_width = 0>
827{
828 template <class t_csa>
831};
832
833template <class t_csa, class t_inv_perm, class t_sel>
835{
836 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
837 "ISA sampling requires: sa_sample_dens == isa_sample_dens");
838
839public:
842 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
843 typedef typename sa_type::bv_type bv_type; // bitvector type used to mark SA samples
844 enum
845 {
846 sample_dens = t_csa::isa_sample_dens
847 };
849
850private:
851 t_sel m_select_marked;
852 t_inv_perm m_inv_perm;
853
854public:
855 t_sel const & select_marked = m_select_marked;
856
860
862 /*
863 * \param cconfig Cache configuration. (Not used in this class)
864 * \param sa_sample Pointer to the corresponding SA sampling..
865 * \par Time complexity
866 * Linear in the size of the suffix array.
867 */
869 const typename std::enable_if<sa_type::text_order, sa_type *>::type sa_sample)
870 {
871 // and initialize the select support on bitvector marked
872 m_select_marked = t_sel(&(sa_sample->marked));
873 int_vector<> const * perm = (int_vector<> const *)sa_sample;
874 m_inv_perm = t_inv_perm(perm);
875 m_inv_perm.set_vector(perm);
876 }
877
880 {
881 m_inv_perm = st.m_inv_perm;
882 m_select_marked = st.m_select_marked;
883 }
884
887 {
888 return m_select_marked(m_inv_perm[i / sample_dens] + 1);
889 }
890
892 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
893 {
894 size_type ci = i / sample_dens;
895 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci * sample_dens);
896 }
897
899 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
900 {
901 size_type ci = (i / sample_dens + 1) % m_inv_perm.size();
902 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci * sample_dens);
903 }
904
907 {
908 if (this != &st)
909 {
910 m_inv_perm = st.m_inv_perm;
911 m_select_marked = st.m_select_marked;
912 }
913 return *this;
914 }
915
918 {
919 if (this != &st)
920 {
921 m_inv_perm.swap(st.m_inv_perm);
922 m_select_marked.swap(st.m_select_marked);
923 }
924 }
925
926 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
927 {
928 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
929 size_type written_bytes = 0;
930 written_bytes += m_inv_perm.serialize(out, child, "inv_perm");
931 written_bytes += m_select_marked.serialize(out, child, "select_marked");
932 structure_tree::add_size(child, written_bytes);
933 return written_bytes;
934 }
935
937 void load(std::istream & in, sa_type const * sa_sample = nullptr)
938 {
939 m_inv_perm.load(in);
940 m_select_marked.load(in);
941 set_vector(sa_sample);
942 }
943
944 template <typename archive_t>
945 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
946 {
947 ar(CEREAL_NVP(m_inv_perm));
948 ar(CEREAL_NVP(m_select_marked));
949 }
950
951 template <typename archive_t>
952 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar, sa_type const * sa_sample = nullptr)
953 {
954 ar(CEREAL_NVP(m_inv_perm));
955 ar(CEREAL_NVP(m_select_marked));
956 set_vector(sa_sample);
957 }
958
960 bool operator==(_text_order_isa_sampling_support const & other) const noexcept
961 {
962 return (m_inv_perm == other.m_inv_perm) && (m_select_marked == other.m_select_marked);
963 }
964
966 bool operator!=(_text_order_isa_sampling_support const & other) const noexcept
967 {
968 return !(*this == other);
969 }
970
971 void set_vector(sa_type const * sa_sample = nullptr)
972 {
973 if (sa_sample == nullptr)
974 {
975 m_select_marked.set_vector(nullptr);
976 m_inv_perm.set_vector(nullptr);
977 }
978 else
979 {
980 m_select_marked.set_vector(&(sa_sample->marked));
981 m_inv_perm.set_vector((int_vector<> const *)sa_sample);
982 }
983 }
984};
985
986template <class t_inv_perm = inv_perm_support<8>, class t_sel = void>
988{
989 template <class t_csa>
991 t_csa,
992 t_inv_perm,
993 typename std::conditional<std::is_void<t_sel>::value,
994 typename t_csa::sa_sample_type::bv_type::select_1_type,
995 t_sel>::type>;
997};
998
999template <class t_csa, class t_select_sa>
1001{
1002 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
1003 "ISA sampling requires: sa_sample_dens==isa_sample_dens");
1004
1005public:
1008 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
1009 enum
1010 {
1011 sample_dens = t_csa::isa_sample_dens
1014
1015private:
1016 sa_type const * m_sa_p = nullptr; // pointer to sa_sample_strategy
1017 t_select_sa m_select_marked_sa;
1018
1019public:
1023
1025 /*
1026 * \param cconfig Cache configuration. (Not used in this class)
1027 * \param sa_sample Pointer to the corresponding SA sampling..
1028 * \par Time complexity
1029 * Linear in the size of the suffix array.
1030 */
1031 _fuzzy_isa_sampling_support(SDSL_UNUSED cache_config const & cconfig, sa_type const * sa_sample) : m_sa_p(sa_sample)
1032 {
1033 util::init_support(m_select_marked_sa, &(sa_sample->marked_sa));
1034 }
1035
1037 _fuzzy_isa_sampling_support(_fuzzy_isa_sampling_support const & st) : m_select_marked_sa(st.m_select_marked_sa)
1038 {
1039 set_vector(st.m_sa_p);
1040 }
1041
1044 {
1045 return m_sa_p->inv(i);
1046 }
1047
1049 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
1050 {
1051 size_type ci = i / sample_dens;
1052 size_type j = m_sa_p->select_marked_isa(ci + 1);
1053 if (j > i)
1054 {
1055 if (ci > 0)
1056 {
1057 ci = ci - 1;
1058 }
1059 else
1060 {
1061 ci = m_sa_p->size() - 1;
1062 }
1063 j = m_sa_p->select_marked_isa(ci + 1);
1064 }
1065 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
1066 }
1067
1069 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
1070 {
1071 size_type ci = i / sample_dens;
1072 size_type j = m_sa_p->select_marked_isa(ci + 1);
1073 if (j < i)
1074 {
1075 if (ci < m_sa_p->size() - 1)
1076 {
1077 ci = ci + 1;
1078 }
1079 else
1080 {
1081 ci = 0;
1082 }
1083 j = m_sa_p->select_marked_isa(ci + 1);
1084 }
1085 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
1086 }
1087
1090 {
1091 if (this != &st)
1092 {
1093 m_select_marked_sa = st.m_select_marked_sa;
1094 set_vector(st.m_sa_p);
1095 }
1096 return *this;
1097 }
1098
1101 {
1102 m_select_marked_sa.swap(st.m_select_marked_sa);
1103 }
1104
1105 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
1106 {
1107 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
1108 size_type written_bytes = 0;
1109 written_bytes += m_select_marked_sa.serialize(out, child, "select_marked_sa");
1110 structure_tree::add_size(child, written_bytes);
1111 return written_bytes;
1112 }
1113
1115 void load(std::istream & in, sa_type const * sa_sample = nullptr)
1116 {
1117 m_select_marked_sa.load(in);
1118 set_vector(sa_sample);
1119 }
1120
1121 template <typename archive_t>
1122 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
1123 {
1124 ar(CEREAL_NVP(m_select_marked_sa));
1125 }
1126
1127 template <typename archive_t>
1128 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar, sa_type const * sa_sample = nullptr)
1129 {
1130 ar(CEREAL_NVP(m_select_marked_sa));
1131 set_vector(sa_sample);
1132 }
1133
1135 bool operator==(_fuzzy_isa_sampling_support const & other) const noexcept
1136 {
1137 return (m_select_marked_sa == other.m_select_marked_sa);
1138 }
1139
1141 bool operator!=(_fuzzy_isa_sampling_support const & other) const noexcept
1142 {
1143 return !(*this == other);
1144 }
1145
1146 void set_vector(sa_type const * sa_sample = nullptr)
1147 {
1148 m_sa_p = sa_sample;
1149 if (nullptr != m_sa_p)
1150 {
1151 m_select_marked_sa.set_vector(&(sa_sample->marked_sa));
1152 }
1153 }
1154};
1155
1156template <class t_select_sa = void>
1158{
1159 template <class t_csa>
1160 using type =
1162 typename std::conditional<std::is_void<t_select_sa>::value,
1163 typename t_csa::sa_sample_type::bv_sa_type::select_1_type,
1164 t_select_sa>::type>;
1166};
1167
1168} // namespace sdsl
1169
1170#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
_bwt_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_bwt_sampling()
Default constructor.
base_type::value_type value_type
_bwt_sampling & operator=(_bwt_sampling const &st)
Assignment operation.
void load(std::istream &in)
_bwt_sampling(_bwt_sampling const &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
int_vector< t_width > base_type
base_type::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
void swap(_bwt_sampling &st)
Swap operation.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
bool operator!=(_fuzzy_isa_sampling_support const &other) const noexcept
Inequality operator.
void swap(_fuzzy_isa_sampling_support &st)
Swap operation.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, sa_type const *sa_sample=nullptr)
_fuzzy_isa_sampling_support & operator=(_fuzzy_isa_sampling_support const &st)
Assignment operation.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
bool operator==(_fuzzy_isa_sampling_support const &other) const noexcept
Equality operator.
void set_vector(sa_type const *sa_sample=nullptr)
_fuzzy_isa_sampling_support(_fuzzy_isa_sampling_support const &st)
Copy constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void load(std::istream &in, sa_type const *sa_sample=nullptr)
Load sampling from disk.
_fuzzy_isa_sampling_support(SDSL_UNUSED cache_config const &cconfig, sa_type const *sa_sample)
Constructor.
bool operator==(_fuzzy_sa_sampling const &other) const noexcept
Equality operator.
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling &&st)
Move assignment operation.
_fuzzy_sa_sampling(_fuzzy_sa_sampling &&st)
Move constructor.
value_type inv(size_type i) const
Return the inv permutation at position i (already condensed!!!)
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_fuzzy_sa_sampling(cache_config &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
bit_vector::value_type value_type
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling const &st)
Assignment operation.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_fuzzy_sa_sampling(_fuzzy_sa_sampling const &st)
Copy constructor.
_fuzzy_sa_sampling()
Default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
t_select_isa const & select_marked_isa
bool operator!=(_fuzzy_sa_sampling const &other) const noexcept
Inequality operator.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
void load(std::istream &in, SDSL_UNUSED sa_type const *sa_sample=nullptr)
Load sampling from disk.
_isa_sampling()
Default constructor.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
value_type operator[](size_type i) const
Returns the ISA value at position j, where.
isa_sampling_tag sampling_category
_isa_sampling(cache_config const &cconfig, SDSL_UNUSED sa_type const *sa_sample=nullptr)
Constructor.
base_type::value_type value_type
int_vector< t_width > base_type
void set_vector(SDSL_UNUSED sa_type const *)
base_type::size_type size_type
t_csa::sa_sample_type sa_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
int_vector< t_width > base_type
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
_sa_order_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_sa_order_sampling()
Default constructor.
base_type::value_type value_type
void set_vector(sa_type const *sa_sample=nullptr)
_text_order_isa_sampling_support(SDSL_UNUSED cache_config const &cconfig, const typename std::enable_if< sa_type::text_order, sa_type * >::type sa_sample)
Constructor.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
_text_order_isa_sampling_support & operator=(_text_order_isa_sampling_support const &st)
Assignment operation.
bool operator==(_text_order_isa_sampling_support const &other) const noexcept
Equality operator.
void load(std::istream &in, sa_type const *sa_sample=nullptr)
Load sampling from disk.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_isa_sampling_support(_text_order_isa_sampling_support const &st)
Copy constructor.
bool operator!=(_text_order_isa_sampling_support const &other) const noexcept
Inequality operator.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
void swap(_text_order_isa_sampling_support &st)
Swap operation.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, sa_type const *sa_sample=nullptr)
value_type condensed_sa(size_type i) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
_text_order_sampling(_text_order_sampling const &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_text_order_sampling & operator=(_text_order_sampling const &st)
Assignment operation.
void swap(_text_order_sampling &st)
Swap operation.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_text_order_sampling()
Default constructor.
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
std::enable_if<!cereal::traits::is_output_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal if archive is not binary.
void swap(int_vector &v) noexcept
Swap method for int_vector.
int_vector_size_type size_type
reference operator[](size_type const &i) noexcept
non const version of [] operator
void load(std::istream &in)
Load the int_vector for a stream.
int_vector & operator=(int_vector const &v)
Assignment operator.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
size_type size() const noexcept
The number of elements in the int_vector.
iterator insert(const_iterator it, value_type value)
Insert an element before the element that the iterator is pointing to.
std::enable_if<!cereal::traits::is_input_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Serialise (load) via cereal if archive is not binary.
int_vector_trait< t_width >::value_type value_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
static mm_event_proxy event(std::string const &name)
A rank structure proposed by Sebastiano Vigna.
A bit vector which compresses very sparse populated bit vectors by.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
Definition wt_int.hpp:62
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition wt_int.hpp:792
size_type size() const
Returns the size of the original vector.
Definition wt_int.hpp:323
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
Definition wt_int.hpp:456
void load(std::istream &in)
Loads the data structure from the given istream.
Definition wt_int.hpp:808
#define SDSL_UNUSED
Definition config.hpp:12
construct.hpp contains methods to construct indexes (compressed suffix arrays and trees).
construct_isa.hpp contains a space and time efficient construction method for the inverse suffix arra...
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
inv_perm_support.hpp contains a class which adds access to the inverse of a permutation.
io.hpp contains some methods for reading/writing sdsl structures.
memory_tracking.hpp contains two function for allocating and deallocating memory
constexpr char KEY_SAMPLE_CHAR[]
Definition config.hpp:44
constexpr char KEY_SA[]
Definition config.hpp:36
constexpr char KEY_ISA[]
Definition config.hpp:39
uint64_t id()
uint64_t pid()
std::string to_string(T const &t, int w=1)
void bit_compress(t_int_vec &v)
Bit compress the int_vector.
Definition util.hpp:502
Namespace for the succinct data structure library.
bool cache_file_exists(std::string const &key, cache_config const &config)
Checks if the resource specified by the key exists in the cache.
Definition io.hpp:733
int remove(std::string const &)
Remove a file.
Definition ram_fs.hpp:221
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition io.hpp:717
bool load_from_cache(T &v, std::string const &key, cache_config const &config, bool add_type_hash=false)
Definition io.hpp:772
void construct(t_index &idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
Definition construct.hpp:56
void construct_isa(cache_config &config)
bool store_to_file(T const &v, std::string const &file)
Store a data structure to a file.
Definition io.hpp:874
int_vector ::size_type size(range_type const &r)
Size of a range.
ram_fs.hpp
rank_support_v.hpp contains rank_support_v.
rrr_vector.hpp contains the sdsl::rrr_vector class, and classes which support rank and select for rrr...
sd_vector.hpp contains the sdsl::sd_vector class, and classes which support rank and select for sd_ve...
Contains declarations and definitions of data structure concepts.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:652
Helper class for construction process.
Definition config.hpp:66
_fuzzy_isa_sampling_support< t_csa, typename std::conditional< std::is_void< t_select_sa >::value, typename t_csa::sa_sample_type::bv_sa_type::select_1_type, t_select_sa >::type > type
_text_order_isa_sampling_support< t_csa, t_inv_perm, typename std::conditional< std::is_void< t_sel >::value, typename t_csa::sa_sample_type::bv_type::select_1_type, t_sel >::type > type
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_int.hpp contains a specialized class for a wavelet tree of a sequence of the numbers.