4#ifndef INCLUDED_SDSL_WT_ALGORITHM
5#define INCLUDED_SDSL_WT_ALGORITHM
15template <
typename t_wt>
16struct has_interval_symbols;
18template <
typename t_wt,
bool t_has_
interval_symbols>
19struct _interval_symbols_wt;
21template <
typename,
typename T>
35std::vector<std::pair<typename t_wt::value_type, typename t_wt::size_type>>
36intersect(
const t_wt & wt,
const std::vector<range_type> & ranges,
typename t_wt::size_type t = 0)
39 using size_type =
typename t_wt::size_type;
40 using value_type =
typename t_wt::value_type;
41 using node_type =
typename t_wt::node_type;
42 using pnvr_type = std::pair<node_type, range_vec_type>;
43 typedef std::stack<pnvr_type> stack_type;
46 "intersect requires t_wt to have expand(const node_type&)");
48 using p_t = std::pair<value_type, size_type>;
51 auto push_node = [&t](stack_type & s, node_type & child,
range_vec_type & child_range) {
52 auto end = std::remove_if(child_range.begin(), child_range.end(), [&](
const range_type & x) {
55 if (end > child_range.begin() + t - 1)
57 s.emplace(pnvr_type(child,
range_vec_type(child_range.begin(), end)));
61 if (ranges.empty())
return res;
63 t = (t == 0) ? ranges.size() : t;
65 std::stack<pnvr_type> stack;
66 stack.emplace(pnvr_type(wt.root(), ranges));
68 while (!stack.empty())
70 pnvr_type x = stack.top();
73 if (wt.is_leaf(x.first))
75 const auto & iv = x.second;
78 auto freq = std::accumulate(iv.begin(), iv.end(), 0ULL, [](size_type acc,
const range_type & r) {
79 return acc + (r[1] - r[0] + 1);
81 res.emplace_back(wt.sym(x.first), freq);
86 auto child = wt.expand(x.first);
87 auto child_ranges = wt.expand(x.first, x.second);
89 push_node(stack, get<1>(child), get<1>(child_ranges));
90 push_node(stack, get<0>(child), get<0>(child_ranges));
103std::pair<typename t_wt::value_type, typename t_wt::size_type>
quantile_freq(
const t_wt & wt,
104 typename t_wt::size_type lb,
105 typename t_wt::size_type rb,
106 typename t_wt::size_type q)
108 static_assert(t_wt::lex_ordered,
"quantile_freq requires a lex_ordered WT");
110 using node_type =
typename t_wt::node_type;
112 "quantile_freq requires t_wt to have expand(const node_type&)");
114 node_type v = wt.root();
117 while (!wt.is_leaf(v))
119 auto child = wt.expand(v);
120 auto child_ranges = wt.expand(v, r);
121 auto num_zeros =
size(get<0>(child_ranges));
127 r = get<1>(child_ranges);
132 r = get<0>(child_ranges);
135 return { wt.sym(v),
size(r) };
141 typename t_wt::size_type & k,
142 std::vector<typename t_wt::value_type> & cs,
143 std::vector<typename t_wt::size_type> & rank_c_i,
144 std::vector<typename t_wt::size_type> & rank_c_j,
145 const typename t_wt::node_type & v)
151 rank_c_j[k] = r[1] + 1;
156 auto child = wt.expand(v);
157 auto child_ranges = wt.expand(v, r);
158 if (!
empty(get<0>(child_ranges)))
162 if (!
empty(get<1>(child_ranges)))
171 typename t_wt::size_type i,
172 typename t_wt::size_type j,
173 typename t_wt::size_type & k,
174 std::vector<typename t_wt::value_type> & cs,
175 std::vector<typename t_wt::size_type> & rank_c_i,
176 std::vector<typename t_wt::size_type> & rank_c_j)
179 assert(i <= j and j <= wt.size());
183 auto res = wt.inverse_select(i);
185 rank_c_i[0] = res.first;
186 rank_c_j[0] = res.first + 1;
219 typename t_wt::size_type i,
220 typename t_wt::size_type j,
221 typename t_wt::size_type & k,
222 std::vector<typename t_wt::value_type> & cs,
223 std::vector<typename t_wt::size_type> & rank_c_i,
224 std::vector<typename t_wt::size_type> & rank_c_j)
242template <
typename t_wt>
245 template <
typename T>
246 static constexpr auto
247 check(T *) ->
typename std::is_same<decltype(std::declval<T>().interval_symbols(std::declval<typename T::size_type>(),
248 std::declval<typename T::size_type>(),
249 std::declval<typename T::size_type &>(),
250 std::declval<std::vector<typename T::value_type> &>(),
251 std::declval<std::vector<typename T::size_type> &>(),
252 std::declval<std::vector<typename T::size_type> &>())),
255 return std::true_type();
258 static constexpr std::false_type
check(...)
260 return std::false_type();
262 typedef decltype(check<t_wt>(
nullptr))
type;
263 static constexpr bool value = type::value;
266template <
typename t_wt,
bool t_has_
interval_symbols>
272 static void call(
const t_wt & wt,
276 std::vector<value_type> & cs,
277 std::vector<size_type> & rank_c_i,
278 std::vector<size_type> & rank_c_j)
280 wt.interval_symbols(i, j, k, cs, rank_c_i, rank_c_j);
284template <
typename t_wt>
294 std::vector<value_type> &,
295 std::vector<size_type> &,
296 std::vector<size_type> &)
300template <
typename,
typename T>
303 static_assert(std::integral_constant<T, false>::value,
"Second template parameter needs to be of function type.");
306template <
typename t_wt,
typename t_ret,
typename... t_args>
309 template <
typename T>
310 static constexpr auto
311 check(T *) ->
typename std::is_same<decltype(std::declval<T>().expand(std::declval<t_args>()...)), t_ret>::type
313 return std::true_type();
316 static constexpr std::false_type
check(...)
318 return std::false_type();
320 typedef decltype(check<t_wt>(
nullptr))
type;
321 static constexpr bool value = type::value;
324template <
typename t_wt>
327 template <
typename T>
328 static constexpr auto
329 check(T *) ->
typename std::is_same<decltype(std::declval<T>().range_search_2d(
330 std::declval<typename T::size_type>(),
331 std::declval<typename T::size_type>(),
332 std::declval<typename T::value_type>(),
333 std::declval<typename T::value_type>(),
335 std::pair<
typename T::size_type,
336 std::vector<std::pair<
typename T::value_type,
337 typename T::size_type>>>>
::type
339 return std::true_type();
343 static constexpr std::false_type
check(...)
345 return std::false_type();
347 typedef decltype(check<t_wt>(
nullptr))
type;
348 static constexpr bool value = type::value;
358std::pair<bool, typename t_wt::value_type>
_symbol_lte(
const t_wt & wt,
typename t_wt::value_type c)
360 if (((1ULL) << (wt.max_level)) <= c)
365 auto node = wt.root();
366 auto predecessor_subtree = node;
367 uint64_t mask = (1ULL) << (wt.max_level - 1);
368 while (!wt.is_leaf(node))
370 auto children = wt.expand(node);
371 auto left_child = std::get<0>(children);
372 auto right_child = std::get<1>(children);
373 if (c & (mask >> node.level))
375 if (right_child.size)
380 predecessor_subtree = left_child;
392 if (left_child.size) { node = left_child; }
395 if (predecessor_subtree == wt.root())
401 node = predecessor_subtree;
406 return {
true, node.sym };
416std::pair<bool, typename t_wt::value_type>
_symbol_gte(
const t_wt & wt,
typename t_wt::value_type c)
418 if (((1ULL) << (wt.max_level)) <= c)
423 auto node = wt.root();
424 auto successor_subtree = node;
425 uint64_t mask = (1ULL) << (wt.max_level - 1);
426 while (!wt.is_leaf(node))
428 auto children = wt.expand(node);
429 auto left_child = std::get<0>(children);
430 auto right_child = std::get<1>(children);
431 if (c & (mask >> node.level))
433 if (right_child.size) { node = right_child; }
436 if (successor_subtree == wt.root())
442 node = successor_subtree;
451 if (right_child.size)
453 successor_subtree = right_child;
464 return {
true, node.sym };
467template <
class t_wt,
bool t_has_
interval_symbols>
487template <
typename t_wt>
490 template <
typename T>
491 static constexpr auto
492 check(T *) ->
typename std::is_same<decltype(std::declval<T>().symbol_gte(std::declval<typename T::value_type>())),
493 std::pair<bool, typename T::value_type>>
::type
495 return std::true_type();
499 static constexpr std::false_type
check(...)
501 return std::false_type();
503 typedef decltype(check<t_wt>(
nullptr))
type;
504 static constexpr bool value = type::value;
514std::pair<bool, typename t_wt::value_type>
symbol_lte(
const t_wt & wt,
typename t_wt::value_type c)
516 static_assert(t_wt::lex_ordered,
"symbols_lte requires a lex_ordered WT");
529std::pair<bool, typename t_wt::value_type>
symbol_gte(
const t_wt & wt,
typename t_wt::value_type c)
531 static_assert(t_wt::lex_ordered,
"symbols_gte requires a lex_ordered WT");
547 typename t_wt::size_type x_i,
548 typename t_wt::size_type x_j,
549 typename t_wt::value_type y_i,
550 typename t_wt::value_type y_j)
552 static_assert(t_wt::lex_ordered,
"restricted_unique_range_values requires a lex_ordered WT");
554 std::vector<typename t_wt::value_type> unique_values;
557 if (x_j > wt.size() - 1) x_j = wt.size() - 1;
558 if ((x_i > x_j) || (y_i > y_j)) {
return unique_values; }
562 if (!lower_y_bound.first || !upper_y_bound.first || (lower_y_bound.second > upper_y_bound.second))
564 return unique_values;
567 auto lower_y_bound_path = wt.path(lower_y_bound.second);
568 auto upper_y_bound_path = wt.path(upper_y_bound.second);
570 auto compare_path = [](uint64_t node_path,
571 uint64_t node_path_len,
572 std::pair<uint64_t, uint64_t> bound_path) ->
int {
573 auto bound_path_len = bound_path.first;
574 auto bound_path_val = bound_path.second;
576 if (bound_path_len > node_path_len) bound_path_val = bound_path_val >> (bound_path_len - node_path_len);
577 if (bound_path_len < node_path_len) bound_path_val = bound_path_val << (node_path_len - bound_path_len);
579 if (node_path < bound_path_val)
return -1;
580 if (node_path > bound_path_val)
return 1;
584 std::stack<std::tuple<typename t_wt::node_type, sdsl::range_type, uint64_t, uint64_t>> stack;
586 stack.emplace(wt.root(), initial_range, 0, 0);
587 while (!stack.empty())
589 auto node_data = stack.top();
591 auto node = std::get<0>(node_data);
592 auto range = std::get<1>(node_data);
593 auto node_path = std::get<2>(node_data);
594 auto node_level = std::get<3>(node_data);
595 if (wt.is_leaf(node)) { unique_values.emplace_back(wt.sym(node)); }
598 auto children = wt.expand(node);
599 auto left_path = node_path << 1ULL;
600 auto right_path = (node_path << 1ULL) | 1ULL;
601 auto child_ranges = wt.expand(node, range);
602 if (compare_path(right_path, node_level + 1, upper_y_bound_path) < 1)
604 auto right_child = std::get<1>(children);
605 auto right_range = std::get<1>(child_ranges);
606 if (!
sdsl::empty(right_range)) stack.emplace(right_child, right_range, right_path, node_level + 1);
608 if (compare_path(left_path, node_level + 1, lower_y_bound_path) > -1)
610 auto left_child = std::get<0>(children);
611 auto left_range = std::get<0>(child_ranges);
612 if (!
sdsl::empty(left_range)) stack.emplace(left_child, left_range, left_path, node_level + 1);
617 return unique_values;
630template <
typename t_wt,
typename T =
void>
636 value = t_expr::value
640template <
typename t_wt>
646 value = t_expr::value
Namespace for the succinct data structure library.
std::pair< bool, typename t_wt::value_type > _symbol_lte(const t_wt &wt, typename t_wt::value_type c)
Returns for a symbol c the previous smaller or equal symbol in the WT.
void interval_symbols(const t_wt &wt, typename t_wt::size_type i, typename t_wt::size_type j, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j)
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
std::pair< bool, typename t_wt::value_type > symbol_lte(const t_wt &wt, typename t_wt::value_type c)
Returns for a symbol c the previous smaller or equal symbol in the WT.
std::vector< std::pair< typename t_wt::value_type, typename t_wt::size_type > > intersect(const t_wt &wt, const std::vector< range_type > &ranges, typename t_wt::size_type t=0)
Intersection of elements in WT[s_0,e_0], WT[s_1,e_1],...,WT[s_k,e_k].
std::pair< bool, typename t_wt::value_type > symbol_gte(const t_wt &wt, typename t_wt::value_type c)
Returns for a symbol c the next larger or equal symbol in the WT.
void _interval_symbols(const t_wt &wt, typename t_wt::size_type i, typename t_wt::size_type j, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j)
bool empty(const range_type &r)
Empty range check.
std::pair< typename t_wt::value_type, typename t_wt::size_type > quantile_freq(const t_wt &wt, typename t_wt::size_type lb, typename t_wt::size_type rb, typename t_wt::size_type q)
Returns the q-th smallest element and its frequency in wt[lb..rb].
std::array< int_vector<>::size_type, 2 > range_type
std::vector< typename t_wt::value_type > restricted_unique_range_values(const t_wt &wt, typename t_wt::size_type x_i, typename t_wt::size_type x_j, typename t_wt::value_type y_i, typename t_wt::value_type y_j)
Returns for a x range [x_i,x_j] and a value range [y_i,y_j] all unique y values occuring in [x_i,...
void _interval_symbols_rec(const t_wt &wt, range_type r, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j, const typename t_wt::node_type &v)
std::pair< bool, typename t_wt::value_type > _symbol_gte(const t_wt &wt, typename t_wt::value_type c)
Returns for a symbol c the next larger or equal symbol in the WT.
std::vector< range_type > range_vec_type
int_vector ::size_type size(const range_type &r)
Size of a range.
static void call(const t_wt &, size_type, size_type, size_type &, std::vector< value_type > &, std::vector< size_type > &, std::vector< size_type > &)
t_wt::value_type value_type
t_wt::size_type size_type
t_wt::value_type value_type
static void call(const t_wt &wt, size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j)
t_wt::size_type size_type
static std::pair< bool, value_type > call_symbol_lte(const t_wt &wt, value_type c)
t_wt::value_type value_type
static std::pair< bool, value_type > call_symbol_gte(const t_wt &wt, value_type c)
t_wt::value_type value_type
static std::pair< bool, value_type > call_symbol_lte(const t_wt &wt, value_type c)
static std::pair< bool, value_type > call_symbol_gte(const t_wt &wt, value_type c)
static constexpr uint64_t all_set
64bit mask with all bits set to 1.
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().expand(std::declval< t_args >()...)), t_ret >::type
decltype(check< t_wt >(nullptr)) type
static constexpr bool value
static constexpr std::false_type check(...)
decltype(check< t_wt >(nullptr)) type
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().interval_symbols(std::declval< typename T::size_type >(), std::declval< typename T::size_type >(), std::declval< typename T::size_type & >(), std::declval< std::vector< typename T::value_type > & >(), std::declval< std::vector< typename T::size_type > & >(), std::declval< std::vector< typename T::size_type > & >())), void >::type
static constexpr bool value
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().range_search_2d(std::declval< typename T::size_type >(), std::declval< typename T::size_type >(), std::declval< typename T::value_type >(), std::declval< typename T::value_type >(), false)), std::pair< typename T::size_type, std::vector< std::pair< typename T::value_type, typename T::size_type > > > >::type
decltype(check< t_wt >(nullptr)) type
static constexpr bool value
static constexpr std::false_type check(...)
decltype(check< t_wt >(nullptr)) type
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().symbol_gte(std::declval< typename T::value_type >())), std::pair< bool, typename T::value_type > >::type