arg_router  1.4.0
C++ command line argument parsing and routing
levenshtein_distance.hpp
1 // Copyright (C) 2023 by Camden Mannett.
2 // Distributed under the Boost Software License, Version 1.0.
3 // (See accompanying file LICENSE or copy at https://www.boost.org/LICENSE_1_0.txt)
4 
5 #pragma once
6 
7 #include "arg_router/parsing/token_type.hpp"
8 #include "arg_router/utility/tree_recursor.hpp"
9 
10 #include <numeric>
11 
13 {
22 [[nodiscard]] inline std::size_t levenshtein_distance(std::string_view a, std::string_view b)
23 {
24  if (a.size() == 0) {
25  return count(b);
26  } else if (b.size() == 0) {
27  return count(a);
28  }
29 
30  const auto n = count(b);
31 
32  auto costs = vector<std::size_t>(n + 1);
33  std::iota(costs.begin(), costs.end(), 0);
34 
35  auto i = std::size_t{0};
36  for (auto c1 : iterator::range(a)) {
37  costs[0] = i + 1;
38  auto corner = i;
39 
40  auto j = std::size_t{0};
41  for (auto c2 : iterator::range(b)) {
42  const auto upper = costs[j + 1];
43  if (c1 == c2) {
44  costs[j + 1] = corner;
45  } else {
46  auto t = std::min(upper, corner);
47  costs[j + 1] = std::min(costs[j], t) + 1;
48  }
49 
50  corner = upper;
51  ++j;
52  }
53  ++i;
54  }
55 
56  return costs[n];
57 }
58 
69 template <typename Node>
71  parsing::token_type token)
72 {
73  static_assert(std::tuple_size_v<typename Node::children_type> > 0,
74  "Node must have at least one child");
75 
76  auto best_token = vector<parsing::token_type>{};
77  auto best_score = std::numeric_limits<std::size_t>::max();
78 
79  // The token may not have been processed yet, so do a type conversion to be sure
80  if (token.prefix == parsing::prefix_type::none) {
81  token = parsing::get_token_type(token.name);
82  }
83 
85  [&](const auto& child, const auto&... parents) {
86  // Skip if child is the starting node
87  if constexpr (sizeof...(parents) > 0) {
88  using child_type = std::decay_t<decltype(child)>;
89  using parents_type_without_root =
90  boost::mp11::mp_pop_back<std::tuple<std::decay_t<decltype(parents)>...>>;
91 
92  // This monstrosity is because we want to keep the vector initialisation from a
93  // fold-expression, whilst removing the root entry which requires converting the
94  // pack to a tuple first
95  [[maybe_unused]] const auto append_parents = [](parsing::token_type child_token) {
96  return std::apply(
97  [&](auto... parents_without_root) {
99  child_token,
100  parsing::node_token_type<std::decay_t<
101  std::remove_pointer_t<decltype(parents_without_root)>>>()...};
102  },
103  boost::mp11::mp_transform<std::add_pointer_t, parents_type_without_root>{});
104  };
105 
106  // Skip runtime disabled nodes
107  if (parsing::is_runtime_disabled(child, parents...)) {
108  return;
109  }
110 
111  // We have to check if the tested token is not the same as the input token, because
112  // sometimes the input token is valid e.g. a value separator is required but one
113  // wasn't on the command line. We only test names for this as the unknown args are
114  // always none-named
115  if constexpr (traits::has_long_name_method_v<child_type>) {
116  const auto score =
117  utility::utf8::levenshtein_distance(token.name, child.long_name());
118  if (score < best_score) {
119  best_token =
120  append_parents({parsing::prefix_type::long_, child.long_name()});
121  best_score = score;
122  }
123  }
124  if constexpr (traits::has_short_name_method_v<child_type>) {
125  const auto score =
126  utility::utf8::levenshtein_distance(token.name, child.short_name());
127  if (score < best_score) {
128  best_token =
129  append_parents({parsing::prefix_type::short_, child.short_name()});
130  best_score = score;
131  }
132  }
133  if constexpr (traits::has_none_name_method_v<child_type>) {
134  const auto score =
135  utility::utf8::levenshtein_distance(token.name, child.none_name());
136  if (score < best_score) {
137  best_token =
138  append_parents({parsing::prefix_type::none, child.none_name()});
139  best_score = score;
140  }
141  }
142  }
143  },
144  node);
145 
146  return best_token;
147 }
148 } // namespace arg_router::utility::utf8
constexpr static range_t range(std::string_view str) noexcept
Definition: utf8.hpp:98
bool is_runtime_disabled(const Node &node, const Parents &... parents) noexcept
Definition: parsing.hpp:133
token_type get_token_type(std::string_view token)
Definition: token_type.hpp:103
constexpr token_type node_token_type() noexcept
Definition: parsing.hpp:68
std::size_t levenshtein_distance(std::string_view a, std::string_view b)
vector< parsing::token_type > closest_matching_child_node(const Node &node, parsing::token_type token)
constexpr std::size_t count(std::string_view str) noexcept
Definition: utf8.hpp:278
constexpr void tree_recursor(Visitor visitor, const Root &root)
std::vector< T, config::allocator< T > > vector
Definition: basic_types.hpp:39
std::string_view name
Token name, stripped of prefix (if any)
Definition: token_type.hpp:65
prefix_type prefix
Prefix type.
Definition: token_type.hpp:64