SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
dac_vector.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_DAC_VECTOR
9#define SDSL_DAC_VECTOR
10
11#include <sdsl/int_vector.hpp>
12#include <sdsl/iterators.hpp>
14
16namespace sdsl
17{
18
20
44template <uint8_t t_b = 4, typename t_rank = rank_support_v5<>>
46{
47 private:
48 static_assert(t_b > 0, "dac_vector: t_b has to be larger than 0");
49 static_assert(t_b < 64, "dac_vector: t_b has to be smaller than 64");
50
51 public:
58 typedef const pointer const_pointer;
60 typedef ptrdiff_t difference_type;
61 typedef t_rank rank_support_type;
63
64 private:
65 int_vector<t_b> m_data; // block data for every level
66 bit_vector m_overflow; // mark non-end bytes
67 rank_support_type m_overflow_rank; // rank for m_overflow
68 int_vector<64> m_level_pointer_and_rank = int_vector<64>(4, 0);
69 uint8_t m_max_level; // maximum level < (log n)/b+1
70
71 public:
72 dac_vector() = default;
73
75 : m_data(v.m_data)
76 , m_overflow(v.m_overflow)
77 , m_overflow_rank(v.m_overflow_rank)
78 , m_level_pointer_and_rank(v.m_level_pointer_and_rank)
79 , m_max_level(v.m_max_level)
80 {
81 m_overflow_rank.set_vector(&m_overflow);
82 }
83
84 dac_vector(dac_vector && v) { *this = std::move(v); }
86 {
87 if (this != &v)
88 {
89 dac_vector tmp(v);
90 *this = std::move(tmp);
91 }
92 return *this;
93 }
94
96 {
97 if (this != &v)
98 {
99 m_data = std::move(v.m_data);
100 m_overflow = std::move(v.m_overflow);
101 m_overflow_rank = std::move(v.m_overflow_rank);
102 m_overflow_rank.set_vector(&m_overflow);
103 m_level_pointer_and_rank = std::move(v.m_level_pointer_and_rank);
104 m_max_level = std::move(v.m_max_level);
105 }
106 return *this;
107 }
108
110
113 template <class Container>
114 dac_vector(const Container & c);
115
117 template <uint8_t int_width>
119
121 size_type size() const { return m_level_pointer_and_rank[2]; }
123 static size_type max_size() { return int_vector<>::max_size() / 2; }
124
126 bool empty() const { return 0 == m_level_pointer_and_rank[2]; }
127
129 const const_iterator begin() const { return const_iterator(this, 0); }
130
132 const const_iterator end() const { return const_iterator(this, size()); }
133
136 {
137 uint8_t level = 1;
138 uint8_t offset = t_b;
139 size_type result = m_data[i];
140 const uint64_t * p = m_level_pointer_and_rank.data();
141 uint64_t ppi = (*p) + i;
142 while (level < m_max_level and m_overflow[ppi])
143 {
144 p += 2;
145 ppi = *p + (m_overflow_rank(ppi) - *(p - 1));
146 result |= (m_data[ppi] << (offset));
147 ++level;
148 offset += t_b;
149 }
150 return result;
151 }
152
154 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const;
155
157 void load(std::istream & in)
158 {
159 m_data.load(in);
160 m_overflow.load(in);
161 m_overflow_rank.load(in, &m_overflow);
162 m_level_pointer_and_rank.load(in);
163 read_member(m_max_level, in);
164 }
165
167 template <typename archive_t>
168 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const;
169
170 template <typename archive_t>
171 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar);
172
173 bool operator==(const dac_vector & v) const
174 {
175 return (m_max_level == v.m_max_level) && (m_data == v.m_data) && (m_overflow == v.m_overflow) &&
176 (m_overflow_rank == v.m_overflow_rank) && (m_level_pointer_and_rank == v.m_level_pointer_and_rank);
177 }
178
179 bool operator!=(const dac_vector & v) const { return !(*this == v); }
180};
181
182template <uint8_t t_b, typename t_rank>
183template <class Container>
185{
186 // (1) Count for each level, how many blocks are needed for the representation
187 // Running time: \f$ O(n \times \frac{\log n}{b} \f$
188 // Result is sorted in m_level_pointer_and_rank
189 size_type n = c.size(), val = 0;
190 if (n == 0) return;
191 // initialize counter
192 m_level_pointer_and_rank = int_vector<64>(128, 0);
193 m_level_pointer_and_rank[0] = n; // level 0 has n entries
194
195 uint8_t level_x_2 = 0;
196 uint8_t max_level_x_2 = 4;
197 for (size_type i = 0; i < n; ++i)
198 {
199 val = c[i];
200 val >>= t_b; // shift value b bits to the right
201 level_x_2 = 2;
202 while (val)
203 {
204 // increase counter for current level by 1
205 ++m_level_pointer_and_rank[level_x_2];
206 val >>= t_b; // shift value b bits to the right
207 level_x_2 += 2; // increase level by 1
208 max_level_x_2 = std::max(max_level_x_2, level_x_2);
209 }
210 }
211 m_level_pointer_and_rank.resize(max_level_x_2);
212 // (2) Determine maximum level and prefix sums of level counters
213 m_max_level = 0;
214 size_type sum_blocks = 0, last_block_size = 0;
215 for (size_type i = 0, t = 0; i < m_level_pointer_and_rank.size(); i += 2)
216 {
217 t = sum_blocks;
218 sum_blocks += m_level_pointer_and_rank[i];
219 m_level_pointer_and_rank[i] = t;
220 if (sum_blocks > t)
221 {
222 ++m_max_level;
223 last_block_size = sum_blocks - t;
224 }
225 }
226 m_overflow = bit_vector(sum_blocks - last_block_size, 0);
227 m_data.resize(sum_blocks);
228
229 assert(last_block_size > 0);
230
231 // (3) Enter block and overflow data
232 int_vector<64> cnt = m_level_pointer_and_rank;
233 const uint64_t mask = bits::lo_set[t_b];
234
235 for (size_type i = 0, j = 0; i < n; ++i)
236 {
237 val = c[i];
238 j = cnt[0]++;
239 m_data[j] = val & mask;
240 val >>= t_b; // shift value b bits to the right
241 level_x_2 = 2;
242 while (val)
243 {
244 m_overflow[j] = 1;
245 // increase counter for current level by 1
246 j = cnt[level_x_2]++;
247 m_data[j] = val & mask;
248 val >>= t_b; // shift value b bits to the right
249 level_x_2 += 2; // increase level by 1
250 }
251 }
252
253 // (4) Initialize rank data structure for m_overflow and precalc rank for
254 // pointers
255 util::init_support(m_overflow_rank, &m_overflow);
256 for (size_type i = 0;
257 2 * i < m_level_pointer_and_rank.size() and m_level_pointer_and_rank[2 * i] < m_overflow.size();
258 ++i)
259 {
260 m_level_pointer_and_rank[2 * i + 1] = m_overflow_rank(m_level_pointer_and_rank[2 * i]);
261 }
262}
263
264template <uint8_t t_b, typename t_rank>
265template <uint8_t int_width>
267{
268 // (1) Count for each level, how many blocks are needed for the representation
269 // Running time: \f$ O(n \times \frac{\log n}{b} \f$
270 // Result is sorted in m_level_pointer_and_rank
271 size_type n = v_buf.size(), val = 0;
272 if (n == 0) return;
273 // initialize counter
274 m_level_pointer_and_rank = int_vector<64>(128, 0);
275 m_level_pointer_and_rank[0] = n; // level 0 has n entries
276
277 uint8_t level_x_2 = 0;
278 uint8_t max_level_x_2 = 4;
279 for (size_type i = 0; i < n; ++i)
280 {
281 val = v_buf[i];
282 val >>= t_b; // shift value b bits to the right
283 level_x_2 = 2;
284 while (val)
285 {
286 // increase counter for current level by 1
287 ++m_level_pointer_and_rank[level_x_2];
288 val >>= t_b; // shift value b bits to the right
289 level_x_2 += 2; // increase level by 1
290 max_level_x_2 = std::max(max_level_x_2, level_x_2);
291 }
292 }
293 m_level_pointer_and_rank.resize(max_level_x_2);
294 // (2) Determine maximum level and prefix sums of level counters
295 m_max_level = 0;
296 size_type sum_blocks = 0, last_block_size = 0;
297 for (size_type i = 0, t = 0; i < m_level_pointer_and_rank.size(); i += 2)
298 {
299 t = sum_blocks;
300 sum_blocks += m_level_pointer_and_rank[i];
301 m_level_pointer_and_rank[i] = t;
302 if (sum_blocks > t)
303 {
304 ++m_max_level;
305 last_block_size = sum_blocks - t;
306 }
307 }
308 m_overflow = bit_vector(sum_blocks - last_block_size, 0);
309 m_data.resize(sum_blocks);
310
311 assert(last_block_size > 0);
312
313 // (3) Enter block and overflow data
314 int_vector<64> cnt = m_level_pointer_and_rank;
315 const uint64_t mask = bits::lo_set[t_b];
316
317 for (size_type i = 0, j = 0; i < n; ++i)
318 {
319 val = v_buf[i];
320 j = cnt[0]++;
321 m_data[j] = val & mask;
322 val >>= t_b; // shift value b bits to the right
323 level_x_2 = 2;
324 while (val)
325 {
326 m_overflow[j] = 1;
327 // increase counter for current level by 1
328 j = cnt[level_x_2]++;
329 m_data[j] = val & mask;
330 val >>= t_b; // shift value b bits to the right
331 level_x_2 += 2; // increase level by 1
332 }
333 }
334
335 // (4) Initialize rank data structure for m_overflow and precalc rank for
336 // pointers
337 util::init_support(m_overflow_rank, &m_overflow);
338 for (size_type i = 0;
339 2 * i < m_level_pointer_and_rank.size() and m_level_pointer_and_rank[2 * i] < m_overflow.size();
340 ++i)
341 {
342 m_level_pointer_and_rank[2 * i + 1] = m_overflow_rank(m_level_pointer_and_rank[2 * i]);
343 }
344}
345
346template <uint8_t t_b, typename t_rank>
349 std::string name) const
350{
351 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
352 size_type written_bytes = 0;
353 written_bytes += m_data.serialize(out, child, "data");
354 written_bytes += m_overflow.serialize(out, child, "overflow");
355 written_bytes += m_overflow_rank.serialize(out, child, "overflow_rank");
356 written_bytes += m_level_pointer_and_rank.serialize(out, child, "level_pointer_and_rank");
357 written_bytes += write_member(m_max_level, out, child, "max_level");
358 structure_tree::add_size(child, written_bytes);
359 return written_bytes;
360}
361
362template <uint8_t t_b, typename t_rank>
363template <typename archive_t>
365{
366 ar(CEREAL_NVP(m_max_level));
367 ar(CEREAL_NVP(m_data));
368 ar(CEREAL_NVP(m_overflow));
369 ar(CEREAL_NVP(m_overflow_rank));
370 ar(CEREAL_NVP(m_level_pointer_and_rank));
371}
372
373template <uint8_t t_b, typename t_rank>
374template <typename archive_t>
376{
377 ar(CEREAL_NVP(m_max_level));
378 ar(CEREAL_NVP(m_data));
379 ar(CEREAL_NVP(m_overflow));
380 ar(CEREAL_NVP(m_overflow_rank));
381 m_overflow_rank.set_vector(&m_overflow);
382 ar(CEREAL_NVP(m_level_pointer_and_rank));
383}
384
385} // end namespace sdsl
386#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
#define CEREAL_LOAD_FUNCTION_NAME
Definition: cereal.hpp:34
#define CEREAL_SAVE_FUNCTION_NAME
Definition: cereal.hpp:35
A generic immutable space-saving vector class for unsigned integers.
Definition: dac_vector.hpp:46
dac_vector & operator=(const dac_vector &v)
Definition: dac_vector.hpp:85
bool operator==(const dac_vector &v) const
Definition: dac_vector.hpp:173
bool empty() const
Returns if the dac_vector is empty.
Definition: dac_vector.hpp:126
const value_type const_reference
Definition: dac_vector.hpp:55
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
Definition: dac_vector.hpp:364
dac_vector(dac_vector &&v)
Definition: dac_vector.hpp:84
dac_vector()=default
dac_vector & operator=(dac_vector &&v)
Definition: dac_vector.hpp:95
const_iterator iterator
Definition: dac_vector.hpp:54
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the dac_vector to a stream.
Definition: dac_vector.hpp:347
int_vector ::size_type size_type
Definition: dac_vector.hpp:59
t_rank rank_support_type
Definition: dac_vector.hpp:61
const const_iterator end() const
Iterator that points to the position after the last element of the dac_vector.
Definition: dac_vector.hpp:132
static size_type max_size()
Return the largest size that this container can ever have.
Definition: dac_vector.hpp:123
random_access_const_iterator< dac_vector > const_iterator
Definition: dac_vector.hpp:53
const_reference reference
Definition: dac_vector.hpp:56
size_type size() const
The number of elements in the dac_vector.
Definition: dac_vector.hpp:121
bool operator!=(const dac_vector &v) const
Definition: dac_vector.hpp:179
void load(std::istream &in)
Load from a stream.
Definition: dac_vector.hpp:157
int_vector ::value_type value_type
Definition: dac_vector.hpp:52
value_type operator[](size_type i) const
[]-operator
Definition: dac_vector.hpp:135
iv_tag index_category
Definition: dac_vector.hpp:62
const pointer const_pointer
Definition: dac_vector.hpp:58
const const_iterator begin() const
Iterator that points to the first element of the dac_vector.
Definition: dac_vector.hpp:129
ptrdiff_t difference_type
Definition: dac_vector.hpp:60
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: dac_vector.hpp:375
const_reference * pointer
Definition: dac_vector.hpp:57
dac_vector(const dac_vector &v)
Definition: dac_vector.hpp:74
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
void load(std::istream &in)
Load the int_vector for a stream.
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:542
Generic iterator for a random access container.
Definition: iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:53
rank_support_v5.hpp contains rank_support_v5.5
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:187