4#ifndef INCLUDED_SDSL_WT_ALGORITHM
5#define INCLUDED_SDSL_WT_ALGORITHM
24template <
typename t_wt>
25struct has_interval_symbols;
26template <
typename t_wt,
bool t_has_
interval_symbols>
27struct _interval_symbols_wt;
28template <
typename,
typename T>
42std::vector<std::pair<typename t_wt::value_type, typename t_wt::size_type>>
43intersect(t_wt
const & wt, std::vector<range_type>
const & ranges,
typename t_wt::size_type t = 0)
46 using size_type =
typename t_wt::size_type;
47 using value_type =
typename t_wt::value_type;
48 using node_type =
typename t_wt::node_type;
49 using pnvr_type = std::pair<node_type, range_vec_type>;
50 typedef std::stack<pnvr_type> stack_type;
53 "intersect requires t_wt to have expand(const node_type&)");
55 using p_t = std::pair<value_type, size_type>;
58 auto push_node = [&t](stack_type & s, node_type & child,
range_vec_type & child_range)
60 auto end = std::remove_if(child_range.begin(),
66 if (end > child_range.begin() + t - 1)
68 s.emplace(pnvr_type(child,
range_vec_type(child_range.begin(), end)));
75 t = (t == 0) ? ranges.
size() : t;
77 std::stack<pnvr_type> stack;
78 stack.emplace(pnvr_type(wt.root(), ranges));
80 while (!stack.empty())
82 pnvr_type x = stack.top();
85 if (wt.is_leaf(x.first))
87 auto const & iv = x.second;
90 auto freq = std::accumulate(iv.begin(),
95 return acc + (r[1] - r[0] + 1);
97 res.emplace_back(wt.sym(x.first), freq);
102 auto child = wt.expand(x.first);
103 auto child_ranges = wt.expand(x.first, x.second);
105 push_node(stack, get<1>(child), get<1>(child_ranges));
106 push_node(stack, get<0>(child), get<0>(child_ranges));
119std::pair<typename t_wt::value_type, typename t_wt::size_type>
120quantile_freq(t_wt
const & wt,
typename t_wt::size_type lb,
typename t_wt::size_type rb,
typename t_wt::size_type q)
122 static_assert(t_wt::lex_ordered,
"quantile_freq requires a lex_ordered WT");
124 using node_type =
typename t_wt::node_type;
126 "quantile_freq requires t_wt to have expand(const node_type&)");
128 node_type v = wt.root();
131 while (!wt.is_leaf(v))
133 auto child = wt.expand(v);
134 auto child_ranges = wt.expand(v, r);
135 auto num_zeros =
size(get<0>(child_ranges));
141 r = get<1>(child_ranges);
146 r = get<0>(child_ranges);
149 return {wt.sym(v),
size(r)};
155 typename t_wt::size_type & k,
156 std::vector<typename t_wt::value_type> & cs,
157 std::vector<typename t_wt::size_type> & rank_c_i,
158 std::vector<typename t_wt::size_type> & rank_c_j,
159 const typename t_wt::node_type & v)
165 rank_c_j[k] = r[1] + 1;
170 auto child = wt.expand(v);
171 auto child_ranges = wt.expand(v, r);
172 if (!
empty(get<0>(child_ranges)))
176 if (!
empty(get<1>(child_ranges)))
185 typename t_wt::size_type i,
186 typename t_wt::size_type j,
187 typename t_wt::size_type & k,
188 std::vector<typename t_wt::value_type> & cs,
189 std::vector<typename t_wt::size_type> & rank_c_i,
190 std::vector<typename t_wt::size_type> & rank_c_j)
193 assert(i <= j and j <= wt.size());
197 auto res = wt.inverse_select(i);
199 rank_c_i[0] = res.first;
200 rank_c_j[0] = res.first + 1;
233 typename t_wt::size_type i,
234 typename t_wt::size_type j,
235 typename t_wt::size_type & k,
236 std::vector<typename t_wt::value_type> & cs,
237 std::vector<typename t_wt::size_type> & rank_c_i,
238 std::vector<typename t_wt::size_type> & rank_c_j)
256template <
typename t_wt>
259 template <
typename T>
260 static constexpr auto check(T *) ->
typename std::is_same<
261 decltype(std::declval<T>().interval_symbols(std::declval<typename T::size_type>(),
262 std::declval<typename T::size_type>(),
263 std::declval<typename T::size_type &>(),
264 std::declval<std::vector<typename T::value_type> &>(),
265 std::declval<std::vector<typename T::size_type> &>(),
266 std::declval<std::vector<typename T::size_type> &>())),
269 return std::true_type();
272 static constexpr std::false_type
check(...)
274 return std::false_type();
277 static constexpr bool value = type::value;
280template <
typename t_wt,
bool t_has_
interval_symbols>
286 static void call(t_wt
const & wt,
290 std::vector<value_type> & cs,
291 std::vector<size_type> & rank_c_i,
292 std::vector<size_type> & rank_c_j)
294 wt.interval_symbols(i, j, k, cs, rank_c_i, rank_c_j);
298template <
typename t_wt>
308 std::vector<value_type> &,
309 std::vector<size_type> &,
310 std::vector<size_type> &)
314template <
typename,
typename T>
317 static_assert(std::integral_constant<T, false>::value,
"Second template parameter needs to be of function type.");
320template <
typename t_wt,
typename t_ret,
typename... t_args>
323 template <
typename T>
325 typename std::is_same<decltype(std::declval<T>().expand(std::declval<t_args>()...)), t_ret>::type
327 return std::true_type();
330 static constexpr std::false_type
check(...)
332 return std::false_type();
334 typedef decltype(check<t_wt>(
nullptr))
type;
335 static constexpr bool value = type::value;
338template <
typename t_wt>
341 template <
typename T>
342 static constexpr auto check(T *) ->
typename std::is_same<
343 decltype(std::declval<T>().range_search_2d(
344 std::declval<typename T::size_type>(),
345 std::declval<typename T::size_type>(),
346 std::declval<typename T::value_type>(),
347 std::declval<typename T::value_type>(),
349 std::pair<
typename T::size_type, std::vector<std::pair<typename T::value_type, typename T::size_type>>>>
::type
351 return std::true_type();
355 static constexpr std::false_type
check(...)
357 return std::false_type();
360 static constexpr bool value = type::value;
370std::pair<bool, typename t_wt::value_type>
_symbol_lte(t_wt
const & wt,
typename t_wt::value_type c)
372 if (((1ULL) << (wt.max_level)) <= c)
377 auto node = wt.root();
378 auto predecessor_subtree = node;
379 uint64_t mask = (1ULL) << (wt.max_level - 1);
380 while (!wt.is_leaf(node))
382 auto children = wt.expand(node);
383 auto left_child = std::get<0>(children);
384 auto right_child = std::get<1>(children);
385 if (c & (mask >> node.level))
387 if (right_child.size)
392 predecessor_subtree = left_child;
410 if (predecessor_subtree == wt.root())
416 node = predecessor_subtree;
421 return {
true, node.sym};
431std::pair<bool, typename t_wt::value_type>
_symbol_gte(t_wt
const & wt,
typename t_wt::value_type c)
433 if (((1ULL) << (wt.max_level)) <= c)
438 auto node = wt.root();
439 auto successor_subtree = node;
440 uint64_t mask = (1ULL) << (wt.max_level - 1);
441 while (!wt.is_leaf(node))
443 auto children = wt.expand(node);
444 auto left_child = std::get<0>(children);
445 auto right_child = std::get<1>(children);
446 if (c & (mask >> node.level))
448 if (right_child.size)
454 if (successor_subtree == wt.root())
460 node = successor_subtree;
469 if (right_child.size)
471 successor_subtree = right_child;
482 return {
true, node.sym};
485template <
class t_wt,
bool t_has_
interval_symbols>
492 return wt.symbol_gte(c);
497 return wt.symbol_lte(c);
517template <
typename t_wt>
520 template <
typename T>
522 typename std::is_same<decltype(std::declval<T>().symbol_gte(std::declval<typename T::value_type>())),
523 std::pair<bool, typename T::value_type>>
::type
525 return std::true_type();
529 static constexpr std::false_type
check(...)
531 return std::false_type();
534 static constexpr bool value = type::value;
544std::pair<bool, typename t_wt::value_type>
symbol_lte(t_wt
const & wt,
typename t_wt::value_type c)
546 static_assert(t_wt::lex_ordered,
"symbols_lte requires a lex_ordered WT");
559std::pair<bool, typename t_wt::value_type>
symbol_gte(t_wt
const & wt,
typename t_wt::value_type c)
561 static_assert(t_wt::lex_ordered,
"symbols_gte requires a lex_ordered WT");
577 typename t_wt::size_type x_i,
578 typename t_wt::size_type x_j,
579 typename t_wt::value_type y_i,
580 typename t_wt::value_type y_j)
582 static_assert(t_wt::lex_ordered,
"restricted_unique_range_values requires a lex_ordered WT");
584 std::vector<typename t_wt::value_type> unique_values;
587 if (x_j > wt.size() - 1)
589 if ((x_i > x_j) || (y_i > y_j))
591 return unique_values;
596 if (!lower_y_bound.first || !upper_y_bound.first || (lower_y_bound.second > upper_y_bound.second))
598 return unique_values;
601 auto lower_y_bound_path = wt.path(lower_y_bound.second);
602 auto upper_y_bound_path = wt.path(upper_y_bound.second);
604 auto compare_path = [](uint64_t node_path, uint64_t node_path_len, std::pair<uint64_t, uint64_t> bound_path) ->
int
606 auto bound_path_len = bound_path.first;
607 auto bound_path_val = bound_path.second;
609 if (bound_path_len > node_path_len)
610 bound_path_val = bound_path_val >> (bound_path_len - node_path_len);
611 if (bound_path_len < node_path_len)
612 bound_path_val = bound_path_val << (node_path_len - bound_path_len);
614 if (node_path < bound_path_val)
616 if (node_path > bound_path_val)
621 std::stack<std::tuple<typename t_wt::node_type, sdsl::range_type, uint64_t, uint64_t>> stack;
623 stack.emplace(wt.root(), initial_range, 0, 0);
624 while (!stack.empty())
626 auto node_data = stack.top();
628 auto node = std::get<0>(node_data);
629 auto range = std::get<1>(node_data);
630 auto node_path = std::get<2>(node_data);
631 auto node_level = std::get<3>(node_data);
632 if (wt.is_leaf(node))
634 unique_values.emplace_back(wt.sym(node));
638 auto children = wt.expand(node);
639 auto left_path = node_path << 1ULL;
640 auto right_path = (node_path << 1ULL) | 1ULL;
641 auto child_ranges = wt.expand(node, range);
642 if (compare_path(right_path, node_level + 1, upper_y_bound_path) < 1)
644 auto right_child = std::get<1>(children);
645 auto right_range = std::get<1>(child_ranges);
647 stack.emplace(right_child, right_range, right_path, node_level + 1);
649 if (compare_path(left_path, node_level + 1, lower_y_bound_path) > -1)
651 auto left_child = std::get<0>(children);
652 auto left_range = std::get<0>(child_ranges);
654 stack.emplace(left_child, left_range, left_path, node_level + 1);
659 return unique_values;
672template <
typename t_wt,
typename T =
void>
678 value = t_expr::value
682template <
typename t_wt>
688 value = t_expr::value
bits.hpp contains the sdsl::bits class.
size_type size() const noexcept
The number of elements in the int_vector.
Namespace for the succinct data structure library.
std::vector< std::pair< typename t_wt::value_type, typename t_wt::size_type > > intersect(t_wt const &wt, std::vector< range_type > const &ranges, typename t_wt::size_type t=0)
Intersection of elements in WT[s_0,e_0], WT[s_1,e_1],...,WT[s_k,e_k].
void _interval_symbols(t_wt const &wt, typename t_wt::size_type i, typename t_wt::size_type j, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j)
std::pair< bool, typename t_wt::value_type > _symbol_gte(t_wt const &wt, typename t_wt::value_type c)
Returns for a symbol c the next larger or equal symbol in the WT.
std::pair< bool, typename t_wt::value_type > symbol_lte(t_wt const &wt, typename t_wt::value_type c)
Returns for a symbol c the previous smaller or equal symbol in the WT.
std::pair< typename t_wt::value_type, typename t_wt::size_type > quantile_freq(t_wt const &wt, typename t_wt::size_type lb, typename t_wt::size_type rb, typename t_wt::size_type q)
Returns the q-th smallest element and its frequency in wt[lb..rb].
std::array< int_vector<>::size_type, 2 > range_type
void interval_symbols(t_wt const &wt, typename t_wt::size_type i, typename t_wt::size_type j, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j)
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
void _interval_symbols_rec(t_wt const &wt, range_type r, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j, const typename t_wt::node_type &v)
bool empty(range_type const &r)
Empty range check.
int_vector ::size_type size(range_type const &r)
Size of a range.
std::pair< bool, typename t_wt::value_type > _symbol_lte(t_wt const &wt, typename t_wt::value_type c)
Returns for a symbol c the previous smaller or equal symbol in the WT.
std::vector< range_type > range_vec_type
std::pair< bool, typename t_wt::value_type > symbol_gte(t_wt const &wt, typename t_wt::value_type c)
Returns for a symbol c the next larger or equal symbol in the WT.
std::vector< typename t_wt::value_type > restricted_unique_range_values(t_wt const &wt, typename t_wt::size_type x_i, typename t_wt::size_type x_j, typename t_wt::value_type y_i, typename t_wt::value_type y_j)
Returns for a x range [x_i,x_j] and a value range [y_i,y_j] all unique y values occuring in [x_i,...
t_wt::value_type value_type
static void call(t_wt const &, size_type, size_type, size_type &, std::vector< value_type > &, std::vector< size_type > &, std::vector< size_type > &)
t_wt::size_type size_type
static void call(t_wt const &wt, size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j)
t_wt::size_type size_type
t_wt::value_type value_type
t_wt::value_type value_type
static std::pair< bool, value_type > call_symbol_lte(t_wt const &wt, value_type c)
static std::pair< bool, value_type > call_symbol_gte(t_wt const &wt, value_type c)
t_wt::value_type value_type
static std::pair< bool, value_type > call_symbol_gte(t_wt const &wt, value_type c)
static std::pair< bool, value_type > call_symbol_lte(t_wt const &wt, value_type c)
static constexpr uint64_t all_set
64bit mask with all bits set to 1.
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().expand(std::declval< t_args >()...)), t_ret >::type
decltype(check< t_wt >(nullptr)) type
static constexpr bool value
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().interval_symbols(std::declval< typename T::size_type >(), std::declval< typename T::size_type >(), std::declval< typename T::size_type & >(), std::declval< std::vector< typename T::value_type > & >(), std::declval< std::vector< typename T::size_type > & >(), std::declval< std::vector< typename T::size_type > & >())), void >::type
decltype(check< t_wt >(nullptr)) type
static constexpr bool value
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().range_search_2d(std::declval< typename T::size_type >(), std::declval< typename T::size_type >(), std::declval< typename T::value_type >(), std::declval< typename T::value_type >(), false)), std::pair< typename T::size_type, std::vector< std::pair< typename T::value_type, typename T::size_type > > > >::type
decltype(check< t_wt >(nullptr)) type
static constexpr bool value
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().symbol_gte(std::declval< typename T::value_type >())), std::pair< bool, typename T::value_type > >::type
decltype(check< t_wt >(nullptr)) type