6#include <rapidfuzz/details/CharSet.hpp>
7#include <rapidfuzz/details/PatternMatchVector.hpp>
8#include <rapidfuzz/details/common.hpp>
9#include <rapidfuzz/distance/Indel.hpp>
13namespace rapidfuzz::fuzz {
45template <
typename InputIt1,
typename InputIt2>
46double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
48template <
typename Sentence1,
typename Sentence2>
49double ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
52namespace experimental {
56 MultiRatio(
size_t count) : input_count(count), scorer(count)
59 size_t result_count()
const
61 return scorer.result_count();
64 template <
typename Sentence1>
65 void insert(
const Sentence1& s1_)
67 insert(detail::to_begin(s1_), detail::to_end(s1_));
70 template <
typename InputIt1>
71 void insert(InputIt1 first1, InputIt1 last1)
73 scorer.insert(first1, last1);
76 template <
typename InputIt2>
77 void similarity(
double* scores,
size_t score_count, InputIt2 first2, InputIt2 last2,
78 double score_cutoff = 0.0)
const
80 similarity(scores, score_count, detail::Range(first2, last2), score_cutoff);
83 template <
typename Sentence2>
84 void similarity(
double* scores,
size_t score_count,
const Sentence2& s2,
double score_cutoff = 0)
const
86 scorer.normalized_similarity(scores, score_count, s2, score_cutoff / 100.0);
88 for (
size_t i = 0; i < input_count; ++i)
94 rapidfuzz::experimental::MultiIndel<MaxLen> scorer;
100template <
typename CharT1>
102 template <
typename InputIt1>
103 CachedRatio(InputIt1 first1, InputIt1 last1) : cached_indel(first1, last1)
106 template <
typename Sentence1>
107 CachedRatio(
const Sentence1& s1) : cached_indel(s1)
110 template <
typename InputIt2>
111 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
112 double score_hint = 0.0)
const;
114 template <
typename Sentence2>
115 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
118 CachedIndel<CharT1> cached_indel;
121template <
typename Sentence1>
122CachedRatio(
const Sentence1& s1) -> CachedRatio<char_type<Sentence1>>;
124template <
typename InputIt1>
125CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio<iter_value_t<InputIt1>>;
127template <
typename InputIt1,
typename InputIt2>
128ScoreAlignment<double> partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2,
129 InputIt2 last2,
double score_cutoff = 0);
131template <
typename Sentence1,
typename Sentence2>
132ScoreAlignment<double> partial_ratio_alignment(
const Sentence1& s1,
const Sentence2& s2,
133 double score_cutoff = 0);
160template <
typename InputIt1,
typename InputIt2>
161double partial_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
162 double score_cutoff = 0);
164template <
typename Sentence1,
typename Sentence2>
165double partial_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
168template <
typename CharT1>
169struct CachedPartialRatio {
171 friend struct CachedWRatio;
173 template <
typename InputIt1>
174 CachedPartialRatio(InputIt1 first1, InputIt1 last1);
176 template <
typename Sentence1>
177 explicit CachedPartialRatio(
const Sentence1& s1_)
178 : CachedPartialRatio(detail::to_begin(s1_), detail::to_end(s1_))
181 template <
typename InputIt2>
182 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
183 double score_hint = 0.0)
const;
185 template <
typename Sentence2>
186 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
189 std::basic_string<CharT1> s1;
190 rapidfuzz::detail::CharSet<CharT1> s1_char_set;
191 CachedRatio<CharT1> cached_ratio;
194template <
typename Sentence1>
195explicit CachedPartialRatio(
const Sentence1& s1) -> CachedPartialRatio<char_type<Sentence1>>;
197template <
typename InputIt1>
198CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio<iter_value_t<InputIt1>>;
226template <
typename InputIt1,
typename InputIt2>
228 double score_cutoff = 0);
230template <
typename Sentence1,
typename Sentence2>
231double token_sort_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
234namespace experimental {
236struct MultiTokenSortRatio {
238 MultiTokenSortRatio(
size_t count) : scorer(count)
241 size_t result_count()
const
243 return scorer.result_count();
246 template <
typename Sentence1>
247 void insert(
const Sentence1& s1_)
249 insert(detail::to_begin(s1_), detail::to_end(s1_));
252 template <
typename InputIt1>
253 void insert(InputIt1 first1, InputIt1 last1)
255 scorer.insert(detail::sorted_split(first1, last1).join());
258 template <
typename InputIt2>
259 void similarity(
double* scores,
size_t score_count, InputIt2 first2, InputIt2 last2,
260 double score_cutoff = 0.0)
const
262 scorer.similarity(scores, score_count, detail::sorted_split(first2, last2).join(), score_cutoff);
265 template <
typename Sentence2>
266 void similarity(
double* scores,
size_t score_count,
const Sentence2& s2,
double score_cutoff = 0)
const
268 similarity(scores, score_count, detail::to_begin(s2), detail::to_end(s2), score_cutoff);
272 MultiRatio<MaxLen> scorer;
279template <
typename CharT1>
280struct CachedTokenSortRatio {
281 template <
typename InputIt1>
282 CachedTokenSortRatio(InputIt1 first1, InputIt1 last1)
283 : s1_sorted(detail::sorted_split(first1, last1).join()), cached_ratio(s1_sorted)
286 template <
typename Sentence1>
287 explicit CachedTokenSortRatio(
const Sentence1& s1)
288 : CachedTokenSortRatio(detail::to_begin(s1), detail::to_end(s1))
291 template <
typename InputIt2>
292 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
293 double score_hint = 0.0)
const;
295 template <
typename Sentence2>
296 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
299 std::basic_string<CharT1> s1_sorted;
300 CachedRatio<CharT1> cached_ratio;
303template <
typename Sentence1>
304explicit CachedTokenSortRatio(
const Sentence1& s1) -> CachedTokenSortRatio<char_type<Sentence1>>;
306template <
typename InputIt1>
307CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio<iter_value_t<InputIt1>>;
329template <
typename InputIt1,
typename InputIt2>
331 double score_cutoff = 0);
333template <
typename Sentence1,
typename Sentence2>
337template <
typename CharT1>
338struct CachedPartialTokenSortRatio {
339 template <
typename InputIt1>
340 CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1)
341 : s1_sorted(detail::sorted_split(first1, last1).join()), cached_partial_ratio(s1_sorted)
344 template <
typename Sentence1>
345 explicit CachedPartialTokenSortRatio(
const Sentence1& s1)
346 : CachedPartialTokenSortRatio(detail::to_begin(s1), detail::to_end(s1))
349 template <
typename InputIt2>
350 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
351 double score_hint = 0.0)
const;
353 template <
typename Sentence2>
354 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
357 std::basic_string<CharT1> s1_sorted;
358 CachedPartialRatio<CharT1> cached_partial_ratio;
361template <
typename Sentence1>
362explicit CachedPartialTokenSortRatio(
const Sentence1& s1)
363 -> CachedPartialTokenSortRatio<char_type<Sentence1>>;
365template <
typename InputIt1>
366CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1)
367 -> CachedPartialTokenSortRatio<iter_value_t<InputIt1>>;
397template <
typename InputIt1,
typename InputIt2>
398double token_set_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
399 double score_cutoff = 0);
401template <
typename Sentence1,
typename Sentence2>
402double token_set_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
405template <
typename CharT1>
406struct CachedTokenSetRatio {
407 template <
typename InputIt1>
408 CachedTokenSetRatio(InputIt1 first1, InputIt1 last1)
409 : s1(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1)))
412 template <
typename Sentence1>
413 explicit CachedTokenSetRatio(
const Sentence1& s1_)
414 : CachedTokenSetRatio(detail::to_begin(s1_), detail::to_end(s1_))
417 template <
typename InputIt2>
418 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
419 double score_hint = 0.0)
const;
421 template <
typename Sentence2>
422 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
425 std::basic_string<CharT1> s1;
426 detail::SplittedSentenceView<typename std::basic_string<CharT1>::iterator> tokens_s1;
429template <
typename Sentence1>
430explicit CachedTokenSetRatio(
const Sentence1& s1) -> CachedTokenSetRatio<char_type<Sentence1>>;
432template <
typename InputIt1>
433CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio<iter_value_t<InputIt1>>;
454template <
typename InputIt1,
typename InputIt2>
456 double score_cutoff = 0);
458template <
typename Sentence1,
typename Sentence2>
462template <
typename CharT1>
463struct CachedPartialTokenSetRatio {
464 template <
typename InputIt1>
465 CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1)
466 : s1(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1)))
469 template <
typename Sentence1>
470 explicit CachedPartialTokenSetRatio(
const Sentence1& s1_)
471 : CachedPartialTokenSetRatio(detail::to_begin(s1_), detail::to_end(s1_))
474 template <
typename InputIt2>
475 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
476 double score_hint = 0.0)
const;
478 template <
typename Sentence2>
479 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
482 std::basic_string<CharT1> s1;
483 detail::SplittedSentenceView<typename std::basic_string<CharT1>::iterator> tokens_s1;
486template <
typename Sentence1>
487explicit CachedPartialTokenSetRatio(
const Sentence1& s1) -> CachedPartialTokenSetRatio<char_type<Sentence1>>;
489template <
typename InputIt1>
490CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1)
491 -> CachedPartialTokenSetRatio<iter_value_t<InputIt1>>;
512template <
typename InputIt1,
typename InputIt2>
513double token_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
515template <
typename Sentence1,
typename Sentence2>
516double token_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
519template <
typename CharT1>
520struct CachedTokenRatio {
521 template <
typename InputIt1>
522 CachedTokenRatio(InputIt1 first1, InputIt1 last1)
524 s1_tokens(detail::sorted_split(std::begin(s1), std::end(s1))),
525 s1_sorted(s1_tokens.join()),
526 cached_ratio_s1_sorted(s1_sorted)
529 template <
typename Sentence1>
530 explicit CachedTokenRatio(
const Sentence1& s1_)
531 : CachedTokenRatio(detail::to_begin(s1_), detail::to_end(s1_))
534 template <
typename InputIt2>
535 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
536 double score_hint = 0.0)
const;
538 template <
typename Sentence2>
539 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
542 std::basic_string<CharT1> s1;
543 detail::SplittedSentenceView<typename std::basic_string<CharT1>::iterator> s1_tokens;
544 std::basic_string<CharT1> s1_sorted;
545 CachedRatio<CharT1> cached_ratio_s1_sorted;
548template <
typename Sentence1>
549explicit CachedTokenRatio(
const Sentence1& s1) -> CachedTokenRatio<char_type<Sentence1>>;
551template <
typename InputIt1>
552CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio<iter_value_t<InputIt1>>;
574template <
typename InputIt1,
typename InputIt2>
576 double score_cutoff = 0);
578template <
typename Sentence1,
typename Sentence2>
579double partial_token_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
582template <
typename CharT1>
583struct CachedPartialTokenRatio {
584 template <
typename InputIt1>
585 CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1)
587 tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))),
588 s1_sorted(tokens_s1.join())
591 template <
typename Sentence1>
592 explicit CachedPartialTokenRatio(
const Sentence1& s1_)
593 : CachedPartialTokenRatio(detail::to_begin(s1_), detail::to_end(s1_))
596 template <
typename InputIt2>
597 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
598 double score_hint = 0.0)
const;
600 template <
typename Sentence2>
601 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
604 std::basic_string<CharT1> s1;
605 detail::SplittedSentenceView<typename std::basic_string<CharT1>::iterator> tokens_s1;
606 std::basic_string<CharT1> s1_sorted;
609template <
typename Sentence1>
610explicit CachedPartialTokenRatio(
const Sentence1& s1) -> CachedPartialTokenRatio<char_type<Sentence1>>;
612template <
typename InputIt1>
613CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio<iter_value_t<InputIt1>>;
636template <
typename InputIt1,
typename InputIt2>
637double WRatio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
639template <
typename Sentence1,
typename Sentence2>
640double WRatio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
643template <
typename CharT1>
645 template <
typename InputIt1>
646 explicit CachedWRatio(InputIt1 first1, InputIt1 last1);
648 template <
typename Sentence1>
649 CachedWRatio(
const Sentence1& s1_) : CachedWRatio(detail::to_begin(s1_), detail::to_end(s1_))
652 template <
typename InputIt2>
653 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
654 double score_hint = 0.0)
const;
656 template <
typename Sentence2>
657 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
662 std::basic_string<CharT1> s1;
663 CachedPartialRatio<CharT1> cached_partial_ratio;
664 detail::SplittedSentenceView<typename std::basic_string<CharT1>::iterator> tokens_s1;
665 std::basic_string<CharT1> s1_sorted;
666 rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted;
669template <
typename Sentence1>
670explicit CachedWRatio(
const Sentence1& s1) -> CachedWRatio<char_type<Sentence1>>;
672template <
typename InputIt1>
673CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio<iter_value_t<InputIt1>>;
696template <
typename InputIt1,
typename InputIt2>
697double QRatio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
699template <
typename Sentence1,
typename Sentence2>
700double QRatio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
703namespace experimental {
707 MultiQRatio(
size_t count) : scorer(count)
710 size_t result_count()
const
712 return scorer.result_count();
715 template <
typename Sentence1>
716 void insert(
const Sentence1& s1_)
718 insert(detail::to_begin(s1_), detail::to_end(s1_));
721 template <
typename InputIt1>
722 void insert(InputIt1 first1, InputIt1 last1)
724 scorer.insert(first1, last1);
725 str_lens.push_back(
static_cast<size_t>(std::distance(first1, last1)));
728 template <
typename InputIt2>
729 void similarity(
double* scores,
size_t score_count, InputIt2 first2, InputIt2 last2,
730 double score_cutoff = 0.0)
const
732 similarity(scores, score_count, detail::Range(first2, last2), score_cutoff);
735 template <
typename Sentence2>
736 void similarity(
double* scores,
size_t score_count,
const Sentence2& s2,
double score_cutoff = 0)
const
738 rapidfuzz::detail::Range s2_(s2);
740 for (
size_t i = 0; i < str_lens.size(); ++i)
746 scorer.similarity(scores, score_count, s2, score_cutoff);
748 for (
size_t i = 0; i < str_lens.size(); ++i)
749 if (str_lens[i] == 0) scores[i] = 0;
753 std::vector<size_t> str_lens;
754 MultiRatio<MaxLen> scorer;
759template <
typename CharT1>
761 template <
typename InputIt1>
762 CachedQRatio(InputIt1 first1, InputIt1 last1) : s1(first1, last1), cached_ratio(first1, last1)
765 template <
typename Sentence1>
766 explicit CachedQRatio(
const Sentence1& s1_) : CachedQRatio(detail::to_begin(s1_), detail::to_end(s1_))
769 template <
typename InputIt2>
770 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
771 double score_hint = 0.0)
const;
773 template <
typename Sentence2>
774 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
777 std::basic_string<CharT1> s1;
778 CachedRatio<CharT1> cached_ratio;
781template <
typename Sentence1>
782explicit CachedQRatio(
const Sentence1& s1) -> CachedQRatio<char_type<Sentence1>>;
784template <
typename InputIt1>
785CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio<iter_value_t<InputIt1>>;
791#include <rapidfuzz/fuzz.impl>
double token_sort_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
Sorts the words in the strings and calculates the fuzz::ratio between them.
double partial_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
calculates the fuzz::ratio of the optimal string alignment
double token_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
Helper method that returns the maximum of fuzz::token_set_ratio and fuzz::token_sort_ratio (faster th...
double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
calculates a simple ratio between two strings
double partial_token_sort_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
Sorts the words in the strings and calculates the fuzz::partial_ratio between them.
double partial_token_set_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
Compares the words in the strings based on unique and common words between them using fuzz::partial_r...
double WRatio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
Calculates a weighted ratio based on the other ratio algorithms.
double QRatio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
Calculates a quick ratio between two strings using fuzz.ratio.
double partial_token_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
Helper method that returns the maximum of fuzz::partial_token_set_ratio and fuzz::partial_token_sort_...
double token_set_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff=0)
Compares the words in the strings based on unique and common words between them using fuzz::ratio.