SDSL 3.0.2
Succinct Data Structure Library
Loading...
Searching...
No Matches
nearest_neighbour_dictionary.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY
9#define INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY
10
11#include <assert.h>
12#include <iosfwd>
13#include <stdint.h>
14#include <string>
15
16#include <sdsl/bits.hpp>
17#include <sdsl/cereal.hpp>
18#include <sdsl/int_vector.hpp>
19#include <sdsl/io.hpp>
22#include <sdsl/util.hpp>
23
25namespace sdsl
26{
27
30
41// TODO: implement an iterator for the ones in the nearest neighbour dictionary!!! used in the construction of the
42// balanced parentheses support
43template <uint8_t t_sample_dens>
45{
46private:
47 static_assert(t_sample_dens != 0, "nearest_neighbour_dictionary: t_sample_dens should not be equal 0!");
48
49public:
51
52private:
53 int_vector<> m_abs_samples; // absolute samples array corresponds to array \f$ A_1 \f$ in the paper
54 int_vector<> m_differences; // vector for the differences in between the samples; corresponds to array \f$ A_2 \f$
55 // in the paper
56 size_type m_ones; // corresponds to N in the paper
57 size_type m_size; // corresponds to M in the paper
58 bit_vector m_contains_abs_sample; // vector which stores for every block of length t_sample_dens of the original
59 // bit_vector if an absolute sample lies in this block.
60 // Corresponds to array \f$ A_3 \f$ in the paper.
61 rank_support_v<> m_rank_contains_abs_sample; // rank support for m_contains_abs_sample. Corresponds to array \f$ A_4
62 // \f$ in the paper.
63 // NOTE: A faster version should store the absolute samples and the differences interleaved
64
65public:
67 nearest_neighbour_dictionary() : m_ones(0), m_size(0)
68 {}
69
71
73 nearest_neighbour_dictionary(bit_vector const & v) : m_ones(0), m_size(0)
74 {
75 size_type max_distance_between_two_ones = 0;
76 size_type ones = 0; // counter for the ones in v
77
78 // get maximal distance between to ones in the bit vector
79 // speed this up by broadword computing
80 for (size_type i = 0, last_one_pos_plus_1 = 0; i < v.size(); ++i)
81 {
82 if (v[i])
83 {
84 if (i + 1 - last_one_pos_plus_1 > max_distance_between_two_ones)
85 max_distance_between_two_ones = i + 1 - last_one_pos_plus_1;
86 last_one_pos_plus_1 = i + 1;
87 ++ones;
88 }
89 }
90 m_ones = ones;
91 m_size = v.size();
92 // std::cerr<<ones<<std::endl;
93 // initialize absolute samples m_abs_samples[0]=0
94 m_abs_samples = int_vector<>(m_ones / t_sample_dens + 1, 0, bits::hi(v.size()) + 1);
95 // initialize different values
96 m_differences = int_vector<>(m_ones - m_ones / t_sample_dens, 0, bits::hi(max_distance_between_two_ones) + 1);
97 // initialize m_contains_abs_sample
98 m_contains_abs_sample = bit_vector((v.size() + t_sample_dens - 1) / t_sample_dens, 0);
99 ones = 0;
100 for (size_type i = 0, last_one_pos = 0; i < v.size(); ++i)
101 {
102 if (v[i])
103 {
104 ++ones;
105 if ((ones % t_sample_dens) == 0)
106 { // insert absolute samples
107 m_abs_samples[ones / t_sample_dens] = i;
108 m_contains_abs_sample[i / t_sample_dens] = 1;
109 }
110 else
111 {
112 m_differences[ones - ones / t_sample_dens - 1] = i - last_one_pos;
113 }
114 last_one_pos = i;
115 }
116 }
117 util::init_support(m_rank_contains_abs_sample, &m_contains_abs_sample);
118 }
119
122 m_abs_samples(nnd.m_abs_samples),
123 m_differences(nnd.m_differences),
124 m_ones(nnd.m_ones),
125 m_size(nnd.m_size),
126 m_contains_abs_sample(nnd.m_contains_abs_sample),
127 m_rank_contains_abs_sample(nnd.m_rank_contains_abs_sample)
128 {
129 m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
130 }
131
134 {
135 *this = std::move(nnd);
136 }
137
141
143 {
144 if (*this != &nnd)
145 {
147 *this = std::move(tmp);
148 }
149 return *this;
150 }
151
153 {
154 if (this != &nnd)
155 {
156 m_abs_samples = std::move(nnd.m_abs_samples);
157 m_differences = std::move(nnd.m_differences);
158 m_ones = std::move(nnd.m_ones);
159 m_size = std::move(nnd.m_size);
160 m_contains_abs_sample = std::move(nnd.m_contains_abs_sample);
161 m_rank_contains_abs_sample = std::move(nnd.m_rank_contains_abs_sample);
162 m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
163 }
164 return *this;
165 }
166
168
173 {
174 assert(idx <= m_size);
175 size_type r = m_rank_contains_abs_sample.rank(idx / t_sample_dens); //
176 size_type result = r * t_sample_dens;
177 size_type i = m_abs_samples[r];
178 while (++result <= m_ones)
179 {
180 if ((result % t_sample_dens) == 0)
181 {
182 i = m_abs_samples[result / t_sample_dens];
183 }
184 else
185 {
186 i = i + m_differences[result - result / t_sample_dens - 1];
187 }
188 if (i >= idx)
189 return result - 1;
190 }
191 return result - 1;
192 };
193
195
200 {
201 assert(i > 0 and i <= m_ones);
202 size_type j = i / t_sample_dens;
203 size_type result = m_abs_samples[j];
204 j = j * t_sample_dens - j;
205 for (size_type end = j + (i % t_sample_dens); j < end; ++j)
206 {
207 result += m_differences[j];
208 }
209 return result;
210 }
211
213
219 {
220 size_type r = rank(i + 1);
221 assert(r > 0);
222 return select(r);
223 }
231 {
232 size_type r = rank(i);
233 assert(r < m_ones);
234 return select(r + 1);
235 }
236
238 {
239 return m_size;
240 }
241
243 {
244 return m_ones;
245 }
246
248
250 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
251 {
252 size_type written_bytes = 0;
253 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
254 written_bytes += m_abs_samples.serialize(out, child, "absolute_samples");
255 written_bytes += m_differences.serialize(out, child, "differences");
256 written_bytes += write_member(m_ones, out, child, "ones");
257 written_bytes += write_member(m_size, out, child, "size");
258 written_bytes += m_contains_abs_sample.serialize(out, child, "contains_abs_sample");
259 written_bytes += m_rank_contains_abs_sample.serialize(out, child, "rank_contains_abs_sample");
260 structure_tree::add_size(child, written_bytes);
261 return written_bytes;
262 }
263
265
267 void load(std::istream & in)
268 {
269 m_abs_samples.load(in);
270 m_differences.load(in);
271 read_member(m_ones, in);
272 read_member(m_size, in);
273 m_contains_abs_sample.load(in);
274 m_rank_contains_abs_sample.load(in, &m_contains_abs_sample);
275 }
276
277 template <typename archive_t>
278 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
279 {
280 ar(CEREAL_NVP(m_ones));
281 ar(CEREAL_NVP(m_size));
282 ar(CEREAL_NVP(m_abs_samples));
283 ar(CEREAL_NVP(m_differences));
284 ar(CEREAL_NVP(m_contains_abs_sample));
285 ar(CEREAL_NVP(m_rank_contains_abs_sample));
286 }
287
288 template <typename archive_t>
289 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
290 {
291 ar(CEREAL_NVP(m_ones));
292 ar(CEREAL_NVP(m_size));
293 ar(CEREAL_NVP(m_abs_samples));
294 ar(CEREAL_NVP(m_differences));
295 ar(CEREAL_NVP(m_contains_abs_sample));
296 ar(CEREAL_NVP(m_rank_contains_abs_sample));
297 m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
298 }
299
301 bool operator==(nearest_neighbour_dictionary const & other) const noexcept
302 {
303 return (m_ones == other.m_ones) && (m_size == other.m_size) && (m_abs_samples == other.m_abs_samples)
304 && (m_differences == other.m_differences) && (m_contains_abs_sample == other.m_contains_abs_sample)
305 && (m_rank_contains_abs_sample == other.m_rank_contains_abs_sample);
306 }
307
309 bool operator!=(nearest_neighbour_dictionary const & other) const noexcept
310 {
311 return !(*this == other);
312 }
313};
314
315} // end namespace sdsl
316
317#endif // end file
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
int_vector_size_type size_type
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
Nearest neighbour dictionary for sparse uniform sets (described in Geary et al., A Simple Optimal Rep...
size_type next(size_type i) const
Answers "next occurence of one" queries for the supported bit_vector.
bool operator==(nearest_neighbour_dictionary const &other) const noexcept
Equality operator.
size_type rank(size_type idx) const
Answers rank queries for the supported bit_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the nearest_neighbour_dictionary.
void load(std::istream &in)
Loads the nearest_neighbour_dictionary.
nearest_neighbour_dictionary & operator=(nearest_neighbour_dictionary &&nnd)
nearest_neighbour_dictionary(nearest_neighbour_dictionary const &nnd)
Copy constructor.
nearest_neighbour_dictionary & operator=(nearest_neighbour_dictionary const &nnd)
nearest_neighbour_dictionary(nearest_neighbour_dictionary &&nnd)
Move constructor.
nearest_neighbour_dictionary(bit_vector const &v)
Constructor.
size_type select(size_type i) const
Answers select queries for the supported bit_vector.
size_type prev(size_type i) const
Answers "previous occurence of one" queries for the supported bit_vector.
bool operator!=(nearest_neighbour_dictionary const &other) const noexcept
Inequality operator.
A rank structure proposed by Sebastiano Vigna.
void load(std::istream &in, int_vector< 1 > const *v=nullptr)
Loads the rank_support.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes rank_support.
size_type rank(size_type idx) const
Answers rank queries for the supported bit_vector.
void set_vector(bit_vector const *v=nullptr)
Sets the supported bit_vector to the given pointer.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
Namespace for the succinct data structure library.
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition io.hpp:94
void read_member(T &t, std::istream &in)
Definition io.hpp:119
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
rank_support_v.hpp contains rank_support_v.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:652
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.