SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
construct_sa.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_CONSTRUCT_SA
10#define INCLUDED_SDSL_CONSTRUCT_SA
11
12#include <sdsl/config.hpp>
15#include <sdsl/divsufsort.hpp>
16#include <sdsl/int_vector.hpp>
17#include <sdsl/qsufsort.hpp>
18
19namespace sdsl
20{
21
23
44inline void construct_sa_se(cache_config & config)
45{
46 int_vector<8> text;
48
49 if (text.size() <= 2)
50 {
51 // If text is c$ or $ write suffix array [1, 0] or [0]
52 int_vector_buffer<> sa(cache_file_name(conf::KEY_SA, config), std::ios::out, 8, 2);
53 if (text.size() == 2) { sa.push_back(1); }
54 sa.push_back(0);
55 }
56 else
57 {
58 _construct_sa_se<int_vector<8>>(text, cache_file_name(conf::KEY_SA, config), 256, 0);
59 }
61}
62
63namespace algorithm
64{
65
66//
67// Forward declarations
68//----------------------------------------------------------
69
71
78template <typename t_int_vec>
79void calculate_sa(const unsigned char * c, typename t_int_vec::size_type len, t_int_vec & sa)
80{
81 typedef typename t_int_vec::size_type size_type;
82 constexpr uint8_t t_width = t_int_vec::fixed_int_width;
83 if (len <= 1)
84 { // handle special case
85 sa.width(1);
86 sa.resize(len);
87 if (len > 0) sa[0] = 0;
88 return;
89 }
90 bool small_file = (sizeof(len) <= 4 or len < 0x7FFFFFFFULL);
91 if (small_file)
92 {
93 uint8_t sa_width = sa.width();
94 if (32 == t_width or (0 == t_width and 32 >= sa_width))
95 {
96 sa.width(32);
97 sa.resize(len);
98 divsufsort(c, (int32_t *)sa.data(), (int32_t)len);
99 // copy integers back to the right positions
100 if (sa_width != 32)
101 {
102 for (size_type i = 0, p = 0; i < len; ++i, p += sa_width)
103 {
104 sa.set_int(p, sa.get_int(i << 5, 32), sa_width);
105 }
106 sa.width(sa_width);
107 sa.resize(len);
108 }
109 }
110 else
111 {
112 if (sa.width() < bits::hi(len) + 1)
113 {
114 throw std::logic_error("width of int_vector is to small for the text!!!");
115 }
116 int_vector<> sufarray(len, 0, 32);
117 divsufsort(c, (int32_t *)sufarray.data(), (int32_t)len);
118 sa.resize(len);
119 for (size_type i = 0; i < len; ++i) { sa[i] = sufarray[i]; }
120 }
121 }
122 else
123 {
124 uint8_t sa_width = sa.width();
125 sa.width(64);
126 sa.resize(len);
127 divsufsort64(c, (int64_t *)sa.data(), len);
128 // copy integers back to the right positions
129 if (sa_width != 64)
130 {
131 for (size_type i = 0, p = 0; i < len; ++i, p += sa_width)
132 {
133 sa.set_int(p, sa.get_int(i << 6, 64), sa_width);
134 }
135 sa.width(sa_width);
136 sa.resize(len);
137 }
138 }
139}
140
141} // end namespace algorithm
142
144
159template <uint8_t t_width>
161{
162 static_assert(t_width == 0 or t_width == 8,
163 "construct_sa: width must be `0` for integer alphabet and `8` for byte alphabet");
164 const char * KEY_TEXT = key_text_trait<t_width>::KEY_TEXT;
165 if (t_width == 8)
166 {
168 {
169 read_only_mapper<t_width> text(KEY_TEXT, config);
170 auto sa = write_out_mapper<0>::create(cache_file_name(conf::KEY_SA, config), 0, bits::hi(text.size()) + 1);
171 // call divsufsort
172 algorithm::calculate_sa((const unsigned char *)text.data(), text.size(), sa);
173 }
174 else if (construct_config().byte_algo_sa == SE_SAIS)
175 {
176 construct_sa_se(config);
177 }
178 }
179 else if (t_width == 0)
180 {
181 // call qsufsort
182 int_vector<> sa;
183 sdsl::qsufsort::construct_sa(sa, cache_file_name(KEY_TEXT, config).c_str(), 0);
184 store_to_cache(sa, conf::KEY_SA, config);
185 }
186 else
187 {
188 std::cerr << "Unknown alphabet type" << std::endl;
189 }
190}
191
192} // end namespace sdsl
193
194#endif
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
const uint64_t * data() const
A generic vector class for integers of width .
Definition: int_vector.hpp:216
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:595
size_type size() const noexcept
The number of elements in the int_vector.
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
static int_vector_mapper< t_width > create(const std::string &key, cache_config &config)
int_vector.hpp contains the sdsl::int_vector class.
void calculate_sa(const unsigned char *c, typename t_int_vec::size_type len, t_int_vec &sa)
Calculates the Suffix Array for a text.
constexpr char KEY_SA[]
Definition: config.hpp:37
constexpr char KEY_TEXT[]
Definition: config.hpp:41
void construct_sa(int_vector_type &sa, const char *file, uint8_t num_bytes)
Construct a suffix array for the sequence stored in a file.
Definition: qsufsort.hpp:60
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
@ LIBDIVSUFSORT
Definition: config.hpp:61
@ SE_SAIS
Definition: config.hpp:62
int32_t divsufsort(const uint8_t *T, saidx_t *SA, saidx_t n)
void construct_sa_se(cache_config &config)
Constructs the Suffix Array (SA) from text over byte-alphabet.
bool load_from_file(T &v, const std::string &file)
Load sdsl-object v from a file.
Definition: io.hpp:901
void register_cache_file(const std::string &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition: io.hpp:656
void construct_sa(cache_config &config)
Constructs the Suffix Array (SA) from text over byte- or integer-alphabet.
bool store_to_cache(const T &v, const std::string &key, cache_config &config, bool add_type_hash=false)
Stores the object v as a resource in the cache.
Definition: io.hpp:737
construct_config_data & construct_config()
int32_t divsufsort64(const uint8_t *T, int64_t *SA, int64_t n)
qsufsort.hpp contains the interface for the suffix array construction algorithm of Larsson.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
Helper class for construction process.
Definition: config.hpp:67
Helper classes to transform width=0 and width=8 to corresponding text key.
Definition: config.hpp:91