8#ifndef INCLUDED_SDSL_CONSTRUCT_LCP
9#define INCLUDED_SDSL_CONSTRUCT_LCP
62template <u
int8_t t_w
idth>
65 static_assert(t_width == 0 or t_width == 8,
66 "construct_lcp_kasai: width must be `0` for integer alphabet and `8` for byte alphabet");
85 for (size_type i = 0, j = 0, sa_1 = 0, l = 0, n = isa_buf.
size(); i < n; ++i)
94 while (text[i + l] == text[j + l])
107 for (size_type i = sa.
size(); i > 1; --i)
109 sa[i - 1] = sa[i - 2];
133template <u
int8_t t_w
idth>
136 static_assert(t_width == 0 or t_width == 8,
137 "construct_lcp_PHI: width must be `0` for integer alphabet and `8` for byte alphabet");
142 size_type n = sa_buf.
size();
154 for (size_type i = 0, sai_1 = 0; i < n; ++i)
156 size_type sai = sa_buf[i];
167 for (size_type i = 0, l = 0; i < n - 1; ++i)
169 size_type phii = plcp[i];
170 while (text[i + l] == text[phii + l])
177 max_l = std::max(max_l, l);
182 uint8_t lcp_width =
bits::hi(max_l) + 1;
186 size_type buffer_size = 1000000;
190 for (size_type i = 1; i < n; ++i)
192 size_type sai = sa_buf[i];
193 lcp_buf[i] = plcp[sai];
220 size_type n = sa_buf.
size();
227 const uint8_t log_q = 6;
228 const uint32_t q = 1 << log_q;
234 for (size_type i = 0, sai_1 = 0; i < n; ++i)
237 size_type sai = sa_buf[i];
238 if ((sai & modq) == 0)
240 if ((sai >> log_q) >= plcp.
size())
245 plcp[sai >> log_q] = sai_1;
253 for (size_type i = 0, j, k, l = 0; i < plcp.
size(); ++i)
257 while (text[j + l] == text[k + l])
270 size_type buffer_size = 4000000;
277 for (size_type i = 0, sai_1 = 0, l = 0, sai = 0, iq = 0; i < n; ++i)
281 if ((sai & modq) == 0)
283 lcp_out_buf[i] = l = plcp[sai >> log_q];
288 l = plcp[sai >> log_q];
293 while (text[sai + l] == text[sai_1 + l])
299 for (j = 0; j < l; ++j)
301 if (text[sai + j] != text[sai_1 + j])
303 std::cout <<
"lcp[" << i <<
"]=" << l <<
" is two big! " << j <<
" is right!"
304 <<
" sai=" << sai << std::endl;
305 if ((sai & modq) != 0)
306 std::cout <<
" plcp[sai>>log_q]=" << plcp[sai >> log_q] <<
" sai-iq=" << sai - iq <<
" sai=" << sai
307 <<
" sai-iq=" << sai - iq << std::endl;
341#ifdef STUDY_INFORMATIONS
343 size_type matches = 0;
344 size_type comps2 = 0;
349 const size_type n = sa_buf.
size();
350 const size_type m = 254;
359 size_type cnt_c[257] = {0};
360 size_type cnt_cc[257] = {0};
361 size_type cnt_cc2[257] = {0};
362 size_type omitted_c[257] = {0};
363 size_type prev_occ_in_bwt[256] = {0};
364 for (size_type i = 0; i < 256; ++i)
365 prev_occ_in_bwt[i] = (size_type)-1;
366 unsigned char alphabet[257] = {0};
370 size_type m_char_count[2] = {0};
371 uint8_t m_chars[2][256] = {{0}, {0}};
379 size_type done_cnt = 0;
381 for (size_type i = 0; i < n; ++i)
383 ++cnt_c[text[i] + 1];
385 for (
int i = 1; i < 257; ++i)
389 alphabet[sigma++] = (
unsigned char)(i - 1);
391 cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1];
393 alphabet[sigma] =
'\0';
396 size_type sai_1 = sa_buf[0];
397 uint8_t bwti_1 = bwt_buf[0];
398 lcp_sml[cnt_cc[bwti_1]++] = 0;
399 prev_occ_in_bwt[bwti_1] = 0;
400 ++omitted_c[alphabet[0]];
407 size_type rmq_end = 3;
409 const size_type m_mod2 = m % 2;
410 uint8_t cur_c = alphabet[1];
411 size_type big_val = 0;
412 for (size_type i = 1, sai, cur_c_idx = 1, cur_c_cnt = cnt_c[alphabet[1] + 1]; i < n; ++i, --cur_c_cnt)
414 uint8_t bwti = bwt_buf[i];
416 size_type lf = cnt_cc[bwti];
419 if (cur_c_cnt < sigma)
421 cur_c_cnt = cnt_c[(cur_c = alphabet[++cur_c_idx]) + 1];
425 if (i >= cnt_cc[cur_c])
427 if (bwti == bwti_1 and lf < i)
429 l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0;
432 l += (text[sai_1 + m] == text[sai + m]);
433#ifdef STUDY_INFORMATIONS
434 if ((sai_1 ^ sai) >> 6)
444 l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0;
445#ifdef STUDY_INFORMATIONS
446 if ((sai_1 ^ sai) >> 6)
449 while (text[sai_1 + l] == text[sai + l] and l < m + 1)
452#ifdef STUDY_INFORMATIONS
466 if (i > 10000 and i < 10500 and big_val > 3000)
469 util::clear(lcp_sml);
470 construct_lcp_PHI<8>(config);
477 size_type j = rmq_end;
478 while (x <= rmq_stack[j])
480 rmq_stack[++j] = i + 1;
488 size_type x_pos = prev_occ_in_bwt[bwti] + 2;
490 while (x_pos <= rmq_stack[j])
493 rmq_stack[j + 3] - (rmq_stack[j + 3] == m + 2);
504 prev_occ_in_bwt[bwti] = i;
512 if (n > 1000 and nn > 5 * (n / 6))
514 util::clear(lcp_sml);
515 construct_lcp_PHI<8>(config);
520#ifdef STUDY_INFORMATIONS
521 std::cout <<
"# n=" << n <<
" nn=" << nn <<
" nn/n=" << ((double)nn) / n << std::endl;
537 for (size_type i = 0; i < n; ++i)
539 if (lcp_sml_buf[i] >= m)
546 cnt_cc2[0] = cnt_cc[0] = 0;
547 for (size_type i = 1, omitted_sum = 0; i < 257; ++i)
549 cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1];
550 omitted_sum += omitted_c[i - 1];
551 cnt_cc2[i] = cnt_cc[i] - omitted_sum;
555 for (size_type i = 0, i2 = 0; i < n; ++i)
557 uint8_t b = bwt_buf[i];
558 size_type lf_i = cnt_cc[b];
563 lcp_big[i2] = cnt_cc2[b];
589 for (size_type i = 0, i2 = 0; i < n; ++i)
591 uint8_t b = bwt_buf[i];
592 if (lcp_sml_buf[i] >= m)
595 shift_bwt2[i2] = b_1;
615 size_type char_ex[256];
616 for (size_type i = 0; i < 256; ++i)
618 size_type char_occ = 0;
619 size_type m_mod2 = m2 % 2, mm1_mod2 = (m2 + 1) % 2;
620 while (m_char_count[m_mod2] > 0)
625 mm1_mod2 = (m2 + 1) % 2, m_mod2 = m2 % 2;
626 m_char_count[m_mod2] = 0;
628 std::sort(m_chars[mm1_mod2],
629 m_chars[mm1_mod2] + m_char_count[mm1_mod2]);
631 for (size_type mc = 0; mc < m_char_count[mm1_mod2]; ++mc)
633 tLI & mm1_mc_list = m_list[mm1_mod2][m_chars[mm1_mod2][m_char_count[mm1_mod2] - 1 - mc]];
635 while (!mm1_mc_list.empty())
637 size_type i = mm1_mc_list.
front();
638 mm1_mc_list.pop_front();
640 for (size_type k = i; todo2[k]; --k)
642#ifdef STUDY_INFORMATIONS
645 uint8_t b = shift_bwt2[k];
654 for (size_type k = i; todo2[k] and char_occ; ++k)
656#ifdef STUDY_INFORMATIONS
662 size_type p = lcp_big[k];
684 const size_type buffer_size = 1000000;
693 lcp_big_buf.
width());
694 for (size_type i = 0, i2 = 0; i < n; ++i)
696 size_type l = lcp_sml_buf[i];
708#ifdef STUDY_INFORMATIONS
709 std::cout <<
"# racs: " << racs << std::endl;
710 std::cout <<
"# matches: " << matches << std::endl;
711 std::cout <<
"# comps2: " << comps2 << std::endl;
741 const size_type n = sa_buf.
size();
742 const size_type m = 254;
751 size_type cnt_c[257] = {0};
752 size_type cnt_cc[257] = {0};
753 size_type omitted_c[257] = {0};
754 size_type prev_occ_in_bwt[256] = {0};
755 for (size_type i = 0; i < 256; ++i)
756 prev_occ_in_bwt[i] = (size_type)-1;
757 unsigned char alphabet[257] = {0};
766 size_type done_cnt = 0;
768 for (size_type i = 0; i < n; ++i)
770 ++cnt_c[text[i] + 1];
772 for (
int i = 1; i < 257; ++i)
776 alphabet[sigma++] = (
unsigned char)(i - 1);
778 cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1];
780 alphabet[sigma] =
'\0';
783 size_type sai_1 = sa_buf[0];
784 uint8_t bwti_1 = bwt_buf[0];
785 lcp_sml[cnt_cc[bwti_1]++] = 0;
786 prev_occ_in_bwt[bwti_1] = 0;
787 ++omitted_c[alphabet[0]];
794 size_type rmq_end = 3;
796 uint8_t cur_c = alphabet[1];
797 for (size_type i = 1, sai, cur_c_idx = 1, cur_c_cnt = cnt_c[alphabet[1] + 1]; i < n; ++i, --cur_c_cnt)
799 uint8_t bwti = bwt_buf[i];
801 size_type lf = cnt_cc[bwti];
804 if (cur_c_cnt < sigma)
806 cur_c_cnt = cnt_c[(cur_c = alphabet[++cur_c_idx]) + 1];
810 if (i >= cnt_cc[cur_c])
812 if (bwti == bwti_1 and lf < i)
814 l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0;
817 l += (text[sai_1 + m] == text[sai + m]);
825 l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0;
826 while (text[sai_1 + l] == text[sai + l] and l < m + 1)
840 size_type j = rmq_end;
841 while (x <= rmq_stack[j])
843 rmq_stack[++j] = i + 1;
851 size_type x_pos = prev_occ_in_bwt[bwti] + 2;
853 while (x_pos <= rmq_stack[j])
856 rmq_stack[j + 3] - (rmq_stack[j + 3] == m + 2);
865 prev_occ_in_bwt[bwti] = i;
880 size_type sa_n_1 = 0;
885 for (size_type i = 0; i < n; ++i)
887 if (lcp_sml_buf[i] > m)
892 sa_n_1 = sa_buf[n - 1];
896 const size_type bot = sa_n_1;
902 for (size_type i = 0, sai_1 = 0; i < n; ++i)
904 uint8_t b = bwt_buf[i];
905 size_type sai = sa_buf[i];
906 if (lcp_sml_buf[i] > m and b != b_1)
908 phi[todo_rank(sai)] = sai_1;
916 for (size_type i = 0, ii = 0, l = m + 1, p = 0; i < n and ii < nn; ++i)
920 if (i > 0 and todo[i - 1])
924 if ((p = phi[ii]) != bot)
926 while (text[i + l] == text[p + l])
937 for (size_type i = 0, ii = 0; i < n and ii < nn; ++i)
939 if (lcp_sml_buf[i] > m)
941 lcp_big[ii++] = phi[todo_rank(sa_buf[i])];
952 const size_type buffer_size = 1000000;
961 lcp_big_buf.
width());
963 for (size_type i = 0, i2 = 0; i < n; ++i)
965 size_type l = lcp_sml_buf[i];
973 lcp_big_buf.
close(
true);
974 lcp_sml_buf.
close(
true);
996template <
typename t_wt = wt_huff<bit_vector, rank_support_v<>, select_support_scan<1>, select_support_scan<0>>>
1006 uint64_t n = wt_bwt.size();
1011 size_type lcp_value = 0;
1012 size_type lcp_value_offset = 0;
1013 size_type phase = 0;
1015 size_type intervals = 0;
1016 size_type intervals_new = 0;
1018 std::queue<size_type> q;
1019 std::vector<bit_vector> dict(2);
1020 size_type source = 0, target = 1;
1021 bool queue_used =
true;
1022 size_type use_queue_and_wt = n / 2048;
1026 std::vector<unsigned char> cs(wt_bwt.sigma);
1027 std::vector<size_type> rank_c_i(wt_bwt.sigma);
1028 std::vector<size_type> rank_c_j(wt_bwt.sigma);
1032 size_type bb = (n * 20 -
size_in_bytes(wt_bwt) * 8 * 1.25 - 5 * n)
1038 bb = std::min(bb, (size_type)8);
1040 size_type lcp_value_max = (1ULL << bb) - 1;
1041 size_type space_in_bit_for_lcp = n * bb;
1043#ifdef STUDY_INFORMATIONS
1044 std::cout <<
"# l=" << n <<
" b=" << (int)bb <<
" lcp_value_max=" << lcp_value_max
1045 <<
" size_in_bytes(wt_bwt<v,bs,bs>)=" <<
size_in_bytes(wt_bwt) << std::endl;
1056 std::vector<size_type> C;
1064 index_done[0] =
true;
1066 for (size_type i = 0; i < quantity; ++i)
1068 unsigned char c = cs[i];
1069 size_type a_new = C[c] + rank_c_i[i];
1070 size_type b_new = C[c] + rank_c_j[i];
1073 if (!index_done[b_new])
1076 partial_lcp[b_new] = lcp_value;
1077 index_done[b_new] =
true;
1089 if (intervals < use_queue_and_wt && !queue_used)
1092 util::clear(dict[target]);
1097 while (b2 < dict[source].
size())
1099 q.push((a2 - 1) >> 1);
1105 util::clear(dict[source]);
1108 if (intervals >= use_queue_and_wt && queue_used)
1111 dict[source].resize(2 * (n + 1));
1117 dict[source][(q.front() << 1) + 1] = 1;
1119 dict[source][(q.front() << 1)] = 1;
1122 dict[target].resize(2 * (n + 1));
1128 if (intervals < use_queue_and_wt)
1135 size_type a = q.
front();
1137 size_type b = q.
front();
1142 for (size_type i = 0; i < quantity; ++i)
1144 unsigned char c = cs[i];
1145 size_type a_new = C[c] + rank_c_i[i];
1146 size_type b_new = C[c] + rank_c_j[i];
1149 if (!index_done[b_new] and phase == 0)
1151 partial_lcp[b_new] = lcp_value;
1152 index_done[b_new] =
true;
1158 else if (!index_done[b_new])
1160 size_type insert_pos = b_new - ds_rank_support.
rank(b_new);
1161 if (!partial_lcp[insert_pos])
1163 partial_lcp[insert_pos] = lcp_value - lcp_value_offset;
1172 intervals = intervals_new;
1183 while (b2 < dict[source].
size())
1185 interval_symbols(wt_bwt, ((a2 - 1) >> 1), (b2 >> 1), quantity, cs, rank_c_i, rank_c_j);
1186 for (size_type i = 0; i < quantity; ++i)
1188 unsigned char c = cs[i];
1189 size_type a_new = C[c] + rank_c_i[i];
1190 size_type b_new = C[c] + rank_c_j[i];
1192 if (!index_done[b_new] and phase == 0)
1194 partial_lcp[b_new] = lcp_value;
1195 index_done[b_new] =
true;
1197 dict[target][(a_new << 1) + 1] = 1;
1198 dict[target][(b_new << 1)] = 1;
1201 else if (!index_done[b_new])
1203 size_type insert_pos = b_new - ds_rank_support.
rank(b_new);
1204 if (!partial_lcp[insert_pos])
1206 partial_lcp[insert_pos] = lcp_value - lcp_value_offset;
1208 dict[target][(a_new << 1) + 1] = 1;
1209 dict[target][(b_new << 1)] = 1;
1218 std::swap(source, target);
1222 if (lcp_value >= lcp_value_max)
1227 insert_lcp_values(partial_lcp, index_done, lcp_file, lcp_value, lcp_value_offset);
1235 util::init_support(ds_rank_support, &index_done);
1238 lcp_value_offset = lcp_value_max - 1;
1239 size_type remaining_lcp_values = index_done.
size() - ds_rank_support.
rank(index_done.
size());
1241 uint8_t int_width_new =
1242 std::min(space_in_bit_for_lcp / remaining_lcp_values, (size_type)
bits::hi(n - 1) + 1);
1243 lcp_value_max = lcp_value_offset + (1ULL << int_width_new);
1244#ifdef STUDY_INFORMATIONS
1245 std::cout <<
"# l=" << remaining_lcp_values <<
" b=" << (int)int_width_new
1246 <<
" lcp_value_max=" << lcp_value_max << std::endl;
1249 partial_lcp.
width(int_width_new);
1250 partial_lcp.
resize(remaining_lcp_values);
1263 insert_lcp_values(partial_lcp, index_done, lcp_file, lcp_value, lcp_value_offset);
1290template <
typename t_wt = wt_huff<bit_vector, rank_support_v<>, select_support_scan<1>, select_support_scan<0>>>
1296 size_type buffer_size = 1000000;
1297 size_type lcp_value = 0;
1310 size_type intervals = 0;
1311 size_type intervals_new = 0;
1313 std::queue<size_type> q;
1314 std::vector<bit_vector> dict(2);
1315 size_type source = 0, target = 1;
1316 bool queue_used =
true;
1317 size_type use_queue_and_wt = n / 2048;
1321 std::vector<unsigned char> cs(wt_bwt.sigma);
1322 std::vector<size_type> rank_c_i(wt_bwt.sigma);
1323 std::vector<size_type> rank_c_j(wt_bwt.sigma);
1326 bool new_lcp_value =
false;
1327 uint8_t int_width =
bits::hi(n) + 2;
1332 size_type idx_out_buf = 0;
1336 std::vector<size_type> C;
1343 lcp_positions_buf[idx_out_buf++] = 0;
1346 lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n;
1347 new_lcp_value =
false;
1349 index_done[0] =
true;
1353 for (size_type i = 0; i < quantity; ++i)
1355 unsigned char c = cs[i];
1356 size_type a_new = C[c] + rank_c_i[i];
1357 size_type b_new = C[c] + rank_c_j[i];
1360 if (!index_done[b_new])
1365 lcp_positions_buf[idx_out_buf++] = b_new;
1367 index_done[b_new] =
true;
1376 new_lcp_value =
true;
1381 if (intervals < use_queue_and_wt && !queue_used)
1384 util::clear(dict[target]);
1389 while (b2 < dict[source].
size())
1391 q.push((a2 - 1) >> 1);
1397 util::clear(dict[source]);
1400 if (intervals >= use_queue_and_wt && queue_used)
1403 dict[source].resize(2 * (n + 1));
1408 dict[source][(q.front() << 1) + 1] = 1;
1410 dict[source][(q.front() << 1)] = 1;
1413 dict[target].resize(2 * (n + 1));
1418 if (intervals < use_queue_and_wt)
1425 size_type a = q.
front();
1427 size_type b = q.
front();
1432 for (size_type i = 0; i < quantity; ++i)
1434 unsigned char c = cs[i];
1435 size_type a_new = C[c] + rank_c_i[i];
1436 size_type b_new = C[c] + rank_c_j[i];
1439 if (!index_done[b_new])
1442 lcp_positions_buf[idx_out_buf++] = b_new;
1446 lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n;
1447 new_lcp_value =
false;
1449 index_done[b_new] =
true;
1458 intervals = intervals_new;
1468 while (b2 < dict[source].
size())
1470 interval_symbols(wt_bwt, ((a2 - 1) >> 1), (b2 >> 1), quantity, cs, rank_c_i, rank_c_j);
1471 for (size_type i = 0; i < quantity; ++i)
1473 unsigned char c = cs[i];
1474 size_type a_new = C[c] + rank_c_i[i];
1475 size_type b_new = C[c] + rank_c_j[i];
1477 if (!index_done[b_new])
1480 lcp_positions_buf[idx_out_buf++] = b_new;
1484 lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n;
1485 new_lcp_value =
false;
1487 index_done[b_new] =
true;
1489 dict[target][(a_new << 1) + 1] = 1;
1490 dict[target][(b_new << 1)] = 1;
1498 std::swap(source, target);
1502 new_lcp_value =
true;
1505 lcp_positions_buf.
close();
1513 uint8_t int_width =
bits::hi(lcp_value + 1) + 1;
1518 size_type number_of_values = ((n / ((int_width - 1ULL) / 8 + 1) + 16) & (~(0x7ULL)));
1522 number_of_values * int_width / 8,
1524 number_of_values = lcp_array.
buffersize() * 8 / int_width;
1526 for (size_type position_begin = 0, position_end = number_of_values; position_begin < n and number_of_values > 0;
1527 position_begin = position_end, position_end += number_of_values)
1529#ifdef STUDY_INFORMATIONS
1530 std::cout <<
"# number_of_values=" << number_of_values <<
" fill lcp_values with " << position_begin
1531 <<
" <= position <" << position_end <<
", each lcp-value has " << (int)int_width
1532 <<
" bit, lcp_value_max=" << lcp_value <<
" n=" << n << std::endl;
1535 for (size_type i = 0; i < n; ++i)
1537 size_type position = lcp_positions[i];
1543 if (position_begin <= position and position < position_end)
1545 lcp_array[position] = lcp_value;
1552 lcp_positions.
close(
true);
bits.hpp contains the sdsl::bits class.
uint64_t size() const
Returns the number of elements currently stored.
void close(bool remove_file=false)
Close the int_vector_buffer.
uint64_t buffersize() const
Returns the buffersize in bytes.
uint8_t width() const
Returns the width of the integers which are accessed via the [] operator.
void shrink_to_fit()
Free unused allocated memory.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
size_type size() const noexcept
The number of elements in the int_vector.
reference front() noexcept
Returns first element.
void resize(const size_type size)
Resize the int_vector in terms of elements.
static mm_event_proxy event(std::string const &name)
A rank structure proposed by Sebastiano Vigna.
size_type rank(size_type idx) const
Answers rank queries for the supported bit_vector.
construct_isa.hpp contains a space and time efficient construction method for the inverse suffix arra...
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
memory_tracking.hpp contains two function for allocating and deallocating memory
constexpr char KEY_TEXT[]
Get the smallest position f$i geq idx f$ where a bit is set t_int_vec::size_type next_bit(t_int_vec const &v, uint64_t idx)
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Namespace for the succinct data structure library.
bool store_to_cache(T const &v, std::string const &key, cache_config &config, bool add_type_hash=false)
Stores the object v as a resource in the cache.
void push_back_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t(&m_chars)[256], size_type_class &m_char_count)
void construct_lcp_kasai(cache_config &config)
Construct the LCP array for text over byte- or integer-alphabet.
void construct_lcp_bwt_based(cache_config &config)
Construct the LCP array out of the BWT (only for byte strings)
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
void insert_lcp_values(int_vector<> &partial_lcp, bit_vector &index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset)
Merges a partial LCP array into the LCP array on disk.
void construct_lcp_PHI(cache_config &config)
Construct the LCP array for text over byte- or integer-alphabet.
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
T::size_type size_in_bytes(T const &t)
Get the size of a data structure in bytes.
void create_C_array(std::vector< uint64_t > &C, tWT const &wt)
bool load_from_cache(T &v, std::string const &key, cache_config const &config, bool add_type_hash=false)
void construct(t_index &idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
void construct_lcp_semi_extern_PHI(cache_config &config)
Construct the LCP array (only for byte strings)
void construct_lcp_goPHI(cache_config &config)
Construct the LCP array (only for byte strings)
void construct_isa(cache_config &config)
void construct_lcp_bwt_based2(cache_config &config)
Construct the LCP array out of the BWT (only for byte strings)
void interval_symbols(t_wt const &wt, typename t_wt::size_type i, typename t_wt::size_type j, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j)
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
bool store_to_file(T const &v, std::string const &file)
Store a data structure to a file.
int_vector ::size_type size(range_type const &r)
Size of a range.
void construct_lcp_go(cache_config &config)
Construct the LCP array (only for byte strings)
std::list< int_vector<>::size_type > tLI
void push_front_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t(&m_chars)[256], size_type_class &m_char_count)
rank_support_v.hpp contains rank_support_v.
select_support_scan.hpp contains classes that support a sdsl::bit_vector with linear time select.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
static constexpr uint64_t lo_unset[65]
lo_unset[i] is a 64-bit word with the i least significant bits not set and the high bits set.
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Helper class for construction process.
Helper classes to transform width=0 and width=8 to corresponding text key.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.