SDSL 3.0.1
Succinct Data Structure Library
Loading...
Searching...
No Matches
nearest_neighbour_dictionary.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY
9#define INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY
10
11#include <stdexcept>
12#include <string>
13
14#include <sdsl/int_vector.hpp>
15#include <sdsl/rank_support.hpp>
16#include <sdsl/util.hpp>
17#include <sdsl/wm_int.hpp>
18
20namespace sdsl
21{
22
25
36// TODO: implement an iterator for the ones in the nearest neighbour dictionary!!! used in the construction of the
37// balanced parentheses support
38template <uint8_t t_sample_dens>
40{
41 private:
42 static_assert(t_sample_dens != 0, "nearest_neighbour_dictionary: t_sample_dens should not be equal 0!");
43
44 public:
46
47 private:
48 int_vector<> m_abs_samples; // absolute samples array corresponds to array \f$ A_1 \f$ in the paper
49 int_vector<> m_differences; // vector for the differences in between the samples; corresponds to array \f$ A_2 \f$
50 // in the paper
51 size_type m_ones; // corresponds to N in the paper
52 size_type m_size; // corresponds to M in the paper
53 bit_vector m_contains_abs_sample; // vector which stores for every block of length t_sample_dens of the original
54 // bit_vector if an absolute sample lies in this block.
55 // Corresponds to array \f$ A_3 \f$ in the paper.
56 rank_support_v<> m_rank_contains_abs_sample; // rank support for m_contains_abs_sample. Corresponds to array \f$ A_4
57 // \f$ in the paper.
58 // NOTE: A faster version should store the absolute samples and the differences interleaved
59
60 public:
63 : m_ones(0)
64 , m_size(0)
65 {}
66
68
71 : m_ones(0)
72 , m_size(0)
73 {
74 size_type max_distance_between_two_ones = 0;
75 size_type ones = 0; // counter for the ones in v
76
77 // get maximal distance between to ones in the bit vector
78 // speed this up by broadword computing
79 for (size_type i = 0, last_one_pos_plus_1 = 0; i < v.size(); ++i)
80 {
81 if (v[i])
82 {
83 if (i + 1 - last_one_pos_plus_1 > max_distance_between_two_ones)
84 max_distance_between_two_ones = i + 1 - last_one_pos_plus_1;
85 last_one_pos_plus_1 = i + 1;
86 ++ones;
87 }
88 }
89 m_ones = ones;
90 m_size = v.size();
91 // std::cerr<<ones<<std::endl;
92 // initialize absolute samples m_abs_samples[0]=0
93 m_abs_samples = int_vector<>(m_ones / t_sample_dens + 1, 0, bits::hi(v.size()) + 1);
94 // initialize different values
95 m_differences = int_vector<>(m_ones - m_ones / t_sample_dens, 0, bits::hi(max_distance_between_two_ones) + 1);
96 // initialize m_contains_abs_sample
97 m_contains_abs_sample = bit_vector((v.size() + t_sample_dens - 1) / t_sample_dens, 0);
98 ones = 0;
99 for (size_type i = 0, last_one_pos = 0; i < v.size(); ++i)
100 {
101 if (v[i])
102 {
103 ++ones;
104 if ((ones % t_sample_dens) == 0)
105 { // insert absolute samples
106 m_abs_samples[ones / t_sample_dens] = i;
107 m_contains_abs_sample[i / t_sample_dens] = 1;
108 }
109 else
110 {
111 m_differences[ones - ones / t_sample_dens - 1] = i - last_one_pos;
112 }
113 last_one_pos = i;
114 }
115 }
116 util::init_support(m_rank_contains_abs_sample, &m_contains_abs_sample);
117 }
118
121 : m_abs_samples(nnd.m_abs_samples)
122 , m_differences(nnd.m_differences)
123 , m_ones(nnd.m_ones)
124 , m_size(nnd.m_size)
125 , m_contains_abs_sample(nnd.m_contains_abs_sample)
126 , m_rank_contains_abs_sample(nnd.m_rank_contains_abs_sample)
127 {
128 m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
129 }
130
133
136
138 {
139 if (*this != &nnd)
140 {
142 *this = std::move(tmp);
143 }
144 return *this;
145 }
146
148 {
149 if (this != &nnd)
150 {
151 m_abs_samples = std::move(nnd.m_abs_samples);
152 m_differences = std::move(nnd.m_differences);
153 m_ones = std::move(nnd.m_ones);
154 m_size = std::move(nnd.m_size);
155 m_contains_abs_sample = std::move(nnd.m_contains_abs_sample);
156 m_rank_contains_abs_sample = std::move(nnd.m_rank_contains_abs_sample);
157 m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
158 }
159 return *this;
160 }
161
163
168 {
169 assert(idx <= m_size);
170 size_type r = m_rank_contains_abs_sample.rank(idx / t_sample_dens); //
171 size_type result = r * t_sample_dens;
172 size_type i = m_abs_samples[r];
173 while (++result <= m_ones)
174 {
175 if ((result % t_sample_dens) == 0) { i = m_abs_samples[result / t_sample_dens]; }
176 else
177 {
178 i = i + m_differences[result - result / t_sample_dens - 1];
179 }
180 if (i >= idx) return result - 1;
181 }
182 return result - 1;
183 };
184
186
191 {
192 assert(i > 0 and i <= m_ones);
193 size_type j = i / t_sample_dens;
194 size_type result = m_abs_samples[j];
195 j = j * t_sample_dens - j;
196 for (size_type end = j + (i % t_sample_dens); j < end; ++j) { result += m_differences[j]; }
197 return result;
198 }
199
201
207 {
208 size_type r = rank(i + 1);
209 assert(r > 0);
210 return select(r);
211 }
219 {
220 size_type r = rank(i);
221 assert(r < m_ones);
222 return select(r + 1);
223 }
224
225 size_type size() const { return m_size; }
226
227 size_type ones() const { return m_ones; }
228
230
232 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
233 {
234 size_type written_bytes = 0;
235 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
236 written_bytes += m_abs_samples.serialize(out, child, "absolute_samples");
237 written_bytes += m_differences.serialize(out, child, "differences");
238 written_bytes += write_member(m_ones, out, child, "ones");
239 written_bytes += write_member(m_size, out, child, "size");
240 written_bytes += m_contains_abs_sample.serialize(out, child, "contains_abs_sample");
241 written_bytes += m_rank_contains_abs_sample.serialize(out, child, "rank_contains_abs_sample");
242 structure_tree::add_size(child, written_bytes);
243 return written_bytes;
244 }
245
247
249 void load(std::istream & in)
250 {
251 m_abs_samples.load(in);
252 m_differences.load(in);
253 read_member(m_ones, in);
254 read_member(m_size, in);
255 m_contains_abs_sample.load(in);
256 m_rank_contains_abs_sample.load(in, &m_contains_abs_sample);
257 }
258
259 template <typename archive_t>
260 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
261 {
262 ar(CEREAL_NVP(m_ones));
263 ar(CEREAL_NVP(m_size));
264 ar(CEREAL_NVP(m_abs_samples));
265 ar(CEREAL_NVP(m_differences));
266 ar(CEREAL_NVP(m_contains_abs_sample));
267 ar(CEREAL_NVP(m_rank_contains_abs_sample));
268 }
269
270 template <typename archive_t>
271 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
272 {
273 ar(CEREAL_NVP(m_ones));
274 ar(CEREAL_NVP(m_size));
275 ar(CEREAL_NVP(m_abs_samples));
276 ar(CEREAL_NVP(m_differences));
277 ar(CEREAL_NVP(m_contains_abs_sample));
278 ar(CEREAL_NVP(m_rank_contains_abs_sample));
279 m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
280 }
281
283 bool operator==(nearest_neighbour_dictionary const & other) const noexcept
284 {
285 return (m_ones == other.m_ones) && (m_size == other.m_size) && (m_abs_samples == other.m_abs_samples) &&
286 (m_differences == other.m_differences) && (m_contains_abs_sample == other.m_contains_abs_sample) &&
287 (m_rank_contains_abs_sample == other.m_rank_contains_abs_sample);
288 }
289
291 bool operator!=(nearest_neighbour_dictionary const & other) const noexcept { return !(*this == other); }
292};
293
294} // end namespace sdsl
295
296#endif // end file
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
int_vector_size_type size_type
Definition: int_vector.hpp:229
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
Nearest neighbour dictionary for sparse uniform sets (described in Geary et al., A Simple Optimal Rep...
size_type next(size_type i) const
Answers "next occurence of one" queries for the supported bit_vector.
bool operator==(nearest_neighbour_dictionary const &other) const noexcept
Equality operator.
size_type rank(size_type idx) const
Answers rank queries for the supported bit_vector.
nearest_neighbour_dictionary & operator=(const nearest_neighbour_dictionary &nnd)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the nearest_neighbour_dictionary.
void load(std::istream &in)
Loads the nearest_neighbour_dictionary.
nearest_neighbour_dictionary & operator=(nearest_neighbour_dictionary &&nnd)
nearest_neighbour_dictionary(const bit_vector &v)
Constructor.
nearest_neighbour_dictionary(nearest_neighbour_dictionary &&nnd)
Move constructor.
nearest_neighbour_dictionary(const nearest_neighbour_dictionary &nnd)
Copy constructor.
size_type select(size_type i) const
Answers select queries for the supported bit_vector.
size_type prev(size_type i) const
Answers "previous occurence of one" queries for the supported bit_vector.
bool operator!=(nearest_neighbour_dictionary const &other) const noexcept
Inequality operator.
A rank structure proposed by Sebastiano Vigna.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes rank_support.
size_type rank(size_type idx) const
Answers rank queries for the supported bit_vector.
void set_vector(const bit_vector *v=nullptr)
Sets the supported bit_vector to the given pointer.
void load(std::istream &in, const int_vector< 1 > *v=nullptr)
Loads the rank_support.
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:53
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wm_int.hpp contains a specialized class for a wavelet tree for sequences over large alphabets.