arg_router  1.4.0
C++ command line argument parsing and routing
code_point.hpp
1 // Copyright (C) 2022 by Camden Mannett.
2 // Distributed under the Boost Software License, Version 1.0.
3 // (See accompanying file LICENSE or copy at https://www.boost.org/LICENSE_1_0.txt)
4 
5 #pragma once
6 
7 #include <array>
8 #include <cstdint>
9 #include <optional>
10 #include <string_view>
11 
14 {
16 using type = std::uint32_t;
17 
21 class range
22 {
23 public:
32  constexpr range(type first, type last, std::uint8_t meta = 0) noexcept : data_{}
33  {
34  // array::fill is not constexpr until C++20...
35  for (auto& b : data_) {
36  b = 0;
37  }
38 
39  // Can't use reinterpret_cast in constexpr function, so we have to do it the old fashioned
40  // way!
41  // [0-20] Start code point
42  // [21-41] End code point
43  // [42-47] Metadata
44  // NOLINTBEGIN(readability-magic-numbers)
45  data_[0] = first & 0xFF;
46  data_[1] = (first >> 8) & 0xFF;
47  data_[2] = (first >> 16) & 0x1F;
48 
49  data_[2] |= (last & 0x7) << 5;
50  data_[3] = (last >> 3) & 0xFF;
51  data_[4] = (last >> 11) & 0xFF;
52  data_[5] = (last >> 19) & 0x3;
53 
54  data_[5] |= (meta & 0x3F) << 2;
55  // NOLINTEND(readability-magic-numbers)
56  }
57 
62  [[nodiscard]] constexpr type first() const noexcept
63  {
64  // NOLINTBEGIN(readability-magic-numbers)
65  auto value = type{data_[0]};
66  value |= data_[1] << 8;
67  value |= (data_[2] & 0x1F) << 16;
68  // NOLINTEND(readability-magic-numbers)
69 
70  return value;
71  }
72 
77  [[nodiscard]] constexpr type last() const noexcept
78  {
79  // NOLINTBEGIN(readability-magic-numbers)
80  type value = (data_[2] >> 5) & 0x7;
81  value |= data_[3] << 3;
82  value |= data_[4] << 11;
83  value |= (data_[5] & 0x3) << 19;
84  // NOLINTEND(readability-magic-numbers)
85 
86  return value;
87  }
88 
93  [[nodiscard]] constexpr std::uint8_t meta() const noexcept
94  {
95  // NOLINTNEXTLINE
96  return (data_[5] >> 2) & 0x3F;
97  }
98 
104  constexpr bool operator<(range other) const noexcept
105  {
106  if (first() == other.first()) {
107  return last() < other.last();
108  }
109  return first() < other.first();
110  }
111 
117  constexpr bool operator<(type cp) const noexcept { return first() < cp; }
118 
119 private:
120  constexpr static auto bytes_per_cp = std::size_t{6};
121  std::array<std::uint8_t, bytes_per_cp> data_;
122 };
123 
129 [[nodiscard]] inline constexpr std::size_t count(std::string_view str) noexcept
130 {
131  // Only the leading code point byte (applies to ASCII too) will not lead with 0x80
132  constexpr auto high_2_bit_mask = std::uint8_t{0xC0};
133  constexpr auto high_bit = std::uint8_t{0x80};
134 
135  auto result = std::size_t{0};
136  for (auto c : str) {
137  result += (c & high_2_bit_mask) != high_bit;
138  }
139 
140  return result;
141 }
142 
148 [[nodiscard]] inline constexpr std::size_t size(std::string_view str) noexcept
149 {
150  if (str.empty()) {
151  return 0;
152  }
153 
154  const auto first_byte = static_cast<std::uint8_t>(str[0]);
155 
156  // The MSB determines if it's ASCII or UTF-8
157  constexpr auto is_ascii_mask = std::uint8_t{0b1000'0000};
158  if ((first_byte & is_ascii_mask) == 0) {
159  // ASCII
160  return 1;
161  }
162 
163  // Can't think of a way to do this without branches
164  constexpr auto max_2_byte_header = std::uint8_t{0b1101'1111};
165  if (first_byte < max_2_byte_header) {
166  return 2;
167  }
168 
169  constexpr auto max_3_byte_header = std::uint8_t{0b1110'1111};
170  if (first_byte < max_3_byte_header) {
171  return 3;
172  }
173 
174  return 4;
175 }
176 
184 [[nodiscard]] inline constexpr std::optional<type> decode(std::string_view str) noexcept
185 {
186  const auto bytes_to_read = size(str);
187  if (bytes_to_read == 0) {
188  return {};
189  }
190 
191  if (bytes_to_read == 1) {
192  // ASCII
193  return str[0];
194  }
195 
196  if (str.size() < bytes_to_read) {
197  return {};
198  }
199 
200  constexpr auto subsequent_byte_data_bits = 6;
201  constexpr auto subsequent_byte_mask = type{(1 << subsequent_byte_data_bits) - 1};
202  constexpr auto maximum_first_byte_data_mask = type{0b0001'1111};
203 
204  const auto first_byte = static_cast<std::uint8_t>(str[0]);
205 
206  auto result = first_byte & (maximum_first_byte_data_mask >> (bytes_to_read - 2));
207  for (auto i = 1u; i < bytes_to_read; ++i) {
208  const auto subsequent_byte = static_cast<type>(str[i]);
209 
210  // Move the previous reads up to make space for the subsequent byte's data
211  result <<= subsequent_byte_data_bits;
212  result |= subsequent_byte & subsequent_byte_mask;
213  }
214 
215  return result;
216 }
217 
220 class iterator
221 {
222 public:
224  using difference_type = std::string_view::difference_type;
226  using value_type = std::string_view;
228  using pointer = const value_type*;
230  using reference = const value_type&;
232  using iterator_category = std::forward_iterator_tag;
233 
242  class range_t
243  {
244  public:
245  friend class iterator;
246 
251  [[nodiscard]] constexpr iterator begin() noexcept { return iterator{str_}; }
252 
257  [[nodiscard]] constexpr static iterator end() noexcept { return iterator{}; }
258 
259  private:
260  constexpr explicit range_t(std::string_view str) noexcept : str_{str} {}
261 
262  std::string_view str_;
263  };
264 
270  [[nodiscard]] constexpr static range_t range(std::string_view str) noexcept
271  {
272  return range_t{str};
273  }
274 
279  constexpr iterator() noexcept = default;
280 
286  constexpr explicit iterator(std::string_view str) noexcept : str_{str} {}
287 
293  [[nodiscard]] constexpr bool operator==(iterator other) const noexcept
294  {
295  // If they are both empty, then they are considered both end iterators and therefore equal
296  if (str_.empty() && other.str_.empty()) {
297  return true;
298  }
299  return (str_.data() == other.str_.data()) && (str_.size() == other.str_.size());
300  }
301 
307  [[nodiscard]] constexpr bool operator!=(iterator other) const noexcept
308  {
309  return !(*this == other);
310  }
311 
316  [[nodiscard]] constexpr value_type operator*() const noexcept
317  {
318  const auto num_bytes = code_point::size(str_);
319  return str_.substr(0, num_bytes);
320  }
321 
326  constexpr iterator& operator++() noexcept
327  {
328  const auto num_bytes = code_point::size(str_);
329  str_.remove_prefix(num_bytes);
330  return *this;
331  }
332 
337  constexpr iterator operator++(int) noexcept
338  {
339  auto result = *this;
340 
341  ++(*this);
342  return result;
343  }
344 
345 private:
346  std::string_view str_;
347 };
348 } // namespace arg_router::utility::utf8::code_point
constexpr iterator operator++(int) noexcept
Definition: code_point.hpp:337
constexpr bool operator!=(iterator other) const noexcept
Definition: code_point.hpp:307
constexpr static range_t range(std::string_view str) noexcept
Definition: code_point.hpp:270
std::string_view::difference_type difference_type
Definition: code_point.hpp:224
constexpr iterator & operator++() noexcept
Definition: code_point.hpp:326
constexpr value_type operator*() const noexcept
Definition: code_point.hpp:316
constexpr bool operator==(iterator other) const noexcept
Definition: code_point.hpp:293
constexpr type first() const noexcept
Definition: code_point.hpp:62
constexpr range(type first, type last, std::uint8_t meta=0) noexcept
Definition: code_point.hpp:32
constexpr std::uint8_t meta() const noexcept
Definition: code_point.hpp:93
constexpr bool operator<(type cp) const noexcept
Definition: code_point.hpp:117
constexpr type last() const noexcept
Definition: code_point.hpp:77
constexpr bool operator<(range other) const noexcept
Definition: code_point.hpp:104
constexpr std::optional< type > decode(std::string_view str) noexcept
Definition: code_point.hpp:184
constexpr std::size_t count(std::string_view str) noexcept
Definition: code_point.hpp:129
constexpr std::size_t size(std::string_view str) noexcept
Definition: code_point.hpp:148