LCOV - code coverage report
Current view: top level - src/debugger - script_parser.cpp Hit Total Coverage
Test: Malbolge Unit Test Code Coverage Lines: 210 218 96.3 %
Date: 2021-02-03 17:18:54
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Cam Mannett 2020
       2             :  *
       3             :  * See LICENSE file
       4             :  */
       5             : 
       6             : #include "malbolge/debugger/script_parser.hpp"
       7             : #include "malbolge/utility/from_chars.hpp"
       8             : #include "malbolge/utility/string_view_ops.hpp"
       9             : #include "malbolge/utility/unescaper.hpp"
      10             : #include "malbolge/log.hpp"
      11             : 
      12             : #include <boost/algorithm/clamp.hpp>
      13             : 
      14             : #include <fstream>
      15             : #include <vector>
      16             : 
      17             : using namespace malbolge;
      18             : using namespace debugger;
      19             : using namespace utility::string_view_ops;
      20             : using namespace std::string_literals;
      21             : 
      22             : namespace
      23             : {
      24             : using function_types = traits::arg_extractor<script::functions::function_variant>;
      25             : 
      26             : struct argument_string
      27             : {
      28          70 :     explicit argument_string(std::string_view n, std::size_t index) :
      29             :         name{n},
      30          70 :         name_index{index}
      31          70 :     {}
      32             : 
      33             :     std::string_view name;
      34             :     std::size_t name_index;  // Index into trimmed command
      35             :     std::string_view value;
      36             :     std::size_t value_index;  // Index into trimmed command
      37             : };
      38             : 
      39             : class trimmed_command
      40             : {
      41             : public:
      42             :     [[nodiscard]]
      43         236 :     std::string_view string() const
      44             :     {
      45         236 :         return s_;
      46             :     }
      47             : 
      48             :     [[nodiscard]]
      49          26 :     source_location map(std::size_t cmd_index) const
      50             :     {
      51          26 :         if (cmd_index > m_.back().index) {
      52           5 :             return m_.back().untrimmed;
      53             :         }
      54             : 
      55             :         auto it = std::upper_bound(m_.begin(),
      56             :                                    m_.end(),
      57             :                                    cmd_index,
      58          59 :                                    [](auto i, auto m) { return i < m.index; });
      59             : 
      60             :         // Upper bound returns the element greater than cmd_index, so we need to
      61             :         // move down one.  There will always be at least one element in m_
      62             :         // because the we start the parsing with an UNKNOWN content type
      63          21 :         --it;
      64             : 
      65          21 :         auto src_loc = it->untrimmed;
      66          21 :         src_loc.column += cmd_index - it->index;
      67          21 :         return src_loc;
      68             :     }
      69             : 
      70             :     [[nodiscard]]
      71          58 :     source_location end_source_location() const
      72             :     {
      73             :         // There will always be at least one element in m_ because the we start
      74             :         // the parsing with an UNKNOWN content type
      75          58 :         return m_.back().untrimmed;
      76             :     }
      77             : 
      78          85 :     void trim(std::string_view fn_cmd)
      79             :     {
      80          85 :         s_.clear();
      81          85 :         m_.clear();
      82             : 
      83             :         // Because clear() doesn't release the capacity, these are only called
      84             :         // once
      85          85 :         m_.reserve(10);
      86          85 :         s_.reserve(fn_cmd.size());
      87             : 
      88             :         // It is very important that the start indices are zero, rather than the
      89             :         // default of 1 - as this instance will be summed wth the script-level
      90             :         // instance
      91          85 :         auto src_loc = source_location{0, 0};
      92          85 :         auto type = UNKNOWN;
      93             : 
      94        2050 :         auto is_comment = [](auto it, auto end) {
      95             :             // Is this and the next character a forward slash
      96        2050 :             return (*it == '/') && (it != --end) && (*(++it) == '/');
      97             :         };
      98             : 
      99        1036 :         auto insert = [this](auto src_loc) {
     100         280 :             const auto index = s_.size();
     101         280 :             if (!m_.empty() && m_.back().index == index) {
     102          78 :                 m_.back().untrimmed = src_loc;
     103             :             } else {
     104         202 :                 m_.emplace_back(index, src_loc);
     105             :             }
     106         280 :         };
     107             : 
     108        2328 :         for (auto it = fn_cmd.begin(); it != fn_cmd.end(); ++it) {
     109        2243 :             if (*it == ' ' || *it == '\t') {
     110          98 :                 if (type == STRING) {
     111           0 :                     s_.push_back(*it);
     112          98 :                 } else if (type != COMMENT) {
     113          71 :                     type = WHITESPACE;
     114             :                 }
     115          98 :                 ++src_loc.column;
     116        2145 :             } else if (*it == '\n') {
     117          80 :                 insert(src_loc);
     118             : 
     119          80 :                 if (type == STRING) {
     120           0 :                     s_.push_back(*it);
     121          80 :                 } else if (type == COMMENT) {
     122          13 :                     type = UNKNOWN;
     123             :                 }
     124             : 
     125          80 :                 ++src_loc.line;
     126          80 :                 src_loc.column = 0;
     127        2065 :             } else if (*it == '\"') {
     128          15 :                 if (type == STRING) {
     129             :                     // We to check if this was preceded by an escape character
     130           8 :                     if (*(it-1) != '\\') {
     131           6 :                         type = UNKNOWN;
     132             :                     }
     133           8 :                     s_.push_back(*it);
     134           7 :                 } else if (type != COMMENT) {
     135           7 :                     type = STRING;
     136           7 :                     s_.push_back(*it);
     137             :                 }
     138          15 :                 ++src_loc.column;
     139        2050 :             } else if (is_comment(it, fn_cmd.end())) {
     140          16 :                 if (type == STRING) {
     141           0 :                     s_.push_back(*it);
     142          16 :                 } else if (type != COMMENT) {
     143          16 :                     type = COMMENT;
     144             :                 }
     145          16 :                 ++src_loc.column;
     146             :             } else {
     147        2034 :                 if (type == STRING) {
     148          33 :                     s_.push_back(*it);
     149        2001 :                 } else if (type != COMMENT) {
     150        1809 :                     if (type != COMMAND) {
     151         116 :                         type = COMMAND;
     152         116 :                         insert(src_loc);
     153             :                     }
     154        1809 :                     s_.push_back(*it);
     155             :                 }
     156        2034 :                 ++src_loc.column;
     157             :             }
     158             :         }
     159             : 
     160          85 :         if (s_.empty()) [[unlikely]] {
     161           1 :             throw parse_exception{"Empty command", src_loc};
     162             :         }
     163             : 
     164             :         // Add an end point if there isn't already one
     165          84 :         insert(src_loc);
     166          84 :     }
     167             : 
     168             : private:
     169             :     enum content_type
     170             :     {
     171             :         UNKNOWN,
     172             :         COMMAND,
     173             :         WHITESPACE,
     174             :         STRING,
     175             :         COMMENT
     176             :     };
     177             : 
     178             :     struct mapping
     179             :     {
     180         202 :         explicit mapping(std::size_t i, source_location u) :
     181             :             index{i},
     182         202 :             untrimmed{u}
     183         202 :         {}
     184             : 
     185             :         std::size_t index;
     186             :         source_location untrimmed;
     187             :     };
     188             : 
     189             :     std::vector<mapping> m_;
     190             :     std::string s_;
     191             : };
     192             : 
     193          85 : void update_source_location(source_location& script_src_loc,
     194             :                             optional_source_location cmd_src_loc)
     195             : {
     196          85 :     if (!cmd_src_loc) {
     197           0 :         return;
     198             :     }
     199             : 
     200          85 :     if (cmd_src_loc->line) {
     201          62 :         script_src_loc.line += cmd_src_loc->line;
     202          62 :         script_src_loc.column = cmd_src_loc->column+1;
     203             :     } else {
     204          23 :         script_src_loc.column += cmd_src_loc->column;
     205             :     }
     206             : }
     207             : 
     208             : [[nodiscard]]
     209          88 : bool only_whitespace(std::string_view str) noexcept
     210             : {
     211         247 :     return std::all_of(str.begin(), str.end(), [](auto c) { return std::isspace(c); });
     212             : }
     213             : 
     214          77 : void check_fn_name(std::string_view fn_name, const trimmed_command& trimmed)
     215             : {
     216          77 :     auto result = false;
     217         693 :     utility::tuple_type_iterator<function_types>([&](auto, auto ptr) {
     218             :         using Fn = std::remove_pointer_t<decltype(ptr)>;
     219             : 
     220         616 :         if (Fn::name() == fn_name) {
     221          75 :             result = true;
     222             :         }
     223         616 :     });
     224             : 
     225          77 :     if (!result) [[unlikely]] {
     226           6 :         throw parse_exception{"Unrecognised function name: "s + fn_name,
     227           8 :                               trimmed.map(fn_name.size()-1)};
     228             :     }
     229          75 : }
     230             : 
     231             : [[nodiscard]]
     232          84 : std::string_view extract_fn_name(const trimmed_command& trimmed)
     233             : {
     234             :     // Read up to the first bracket
     235          84 :     const auto first_bracket_index = trimmed.string().find_first_of('(');
     236          84 :     if (first_bracket_index == std::string_view::npos) [[unlikely]] {
     237           5 :         throw parse_exception{"No open bracket in function",
     238          10 :                               trimmed.map(first_bracket_index)};
     239          79 :     } else if (first_bracket_index == 0) [[unlikely]] {
     240           2 :         throw parse_exception{"No function name",
     241           4 :                               trimmed.map(first_bracket_index)};
     242             :     }
     243             : 
     244          77 :     auto fn_name = trimmed.string().substr(0, first_bracket_index);
     245          77 :     check_fn_name(fn_name, trimmed);
     246             : 
     247          75 :     return fn_name;
     248             : }
     249             : 
     250             : [[nodiscard]]
     251          75 : std::vector<argument_string> extract_fn_args(std::size_t open_bracket_offset,
     252             :                                              const trimmed_command& trimmed)
     253             : {
     254             :     // Pull just the arg string out of the command string
     255          75 :     const auto cmd = trimmed.string();
     256          75 :     if (cmd.back() != ')') [[unlikely]] {
     257           4 :         throw parse_exception{"No close bracket in function",
     258           8 :                               trimmed.map(cmd.size())};
     259             :     }
     260             : 
     261          71 :     ++open_bracket_offset;  // Skip over the bracket to where the args start
     262             : 
     263             :     // Exit early if there is nothing to parse
     264          71 :     auto result = std::vector<argument_string>{};
     265          71 :     if ((cmd.size() - 1 - open_bracket_offset) == 0) {
     266           7 :         return result;
     267             :     }
     268             : 
     269             :     // Iterate through each character, and mark important characters:
     270             :     // = is the argument/value assignment
     271             :     // , is the argument/value divider
     272             :     // \ is the escape character
     273             :     // " is the string start/end character
     274             :     //
     275             :     // This task is complicated by the fact that any of these characters can be
     276             :     // appear inside a string value (including the double quotes if preceded by
     277             :     // the escape character), but must be ignored
     278          64 :     auto inside_string = false;
     279          64 :     auto escaped = false;
     280          64 :     auto word = std::string_view{cmd.data() + open_bracket_offset, 0};
     281        1050 :     for (auto it  = (cmd.begin() + open_bracket_offset); it != cmd.end(); ++it) {
     282         992 :         switch (*it) {
     283          72 :         case '=':
     284             :         {
     285          72 :             if (inside_string) {
     286           0 :                 break;
     287             :             }
     288          72 :             if (word.empty()) [[unlikely]] {
     289           2 :                 throw parse_exception{"Missing argument name",
     290           4 :                                       trimmed.map(std::distance(cmd.begin(), it))};
     291             :             }
     292             : 
     293          70 :             escaped = false;
     294          70 :             result.emplace_back(word, std::distance(cmd.begin(), it - word.size() - 1));
     295          70 :             word = std::string_view{word.data() + word.size() + 1, 0};
     296          70 :             continue;
     297             :         }
     298          76 :         case ',':
     299             :         case ')':
     300             :         {
     301          76 :             if (inside_string) {
     302           1 :                 break;
     303             :             }
     304          75 :             if (result.empty() || !result.back().value.empty()) [[unlikely]] {
     305           4 :                 throw parse_exception{"Missing argument value",
     306           8 :                                       trimmed.map(std::distance(cmd.begin(), it))};
     307             :             }
     308          71 :             escaped = false;
     309             : 
     310          71 :             auto& arg = result.back();
     311          71 :             arg.value = word;
     312          71 :             arg.value_index = std::distance(cmd.begin(), it - word.size() - 1);
     313          71 :             word = std::string_view{word.data() + word.size() + 1, 0};
     314          71 :             continue;
     315             :         }
     316           2 :         case '\\':
     317             :         {
     318           2 :             escaped = true;
     319           2 :             break;
     320             :         }
     321          15 :         case '"':
     322             :         {
     323          15 :             if (escaped) {
     324           2 :                 escaped = false;
     325           2 :                 break;
     326             :             }
     327          13 :             inside_string = !inside_string;
     328          13 :             break;
     329             :         }
     330         827 :         default:
     331         827 :             escaped = false;
     332         897 :             break;
     333             :         }
     334             : 
     335         845 :         word = std::string_view{word.data(), word.size() + 1};
     336             :     }
     337             : 
     338          58 :     if (inside_string) [[unlikely]] {
     339           1 :         throw parse_exception{"Unterminated string", trimmed.map(cmd.size())};
     340             :     }
     341             : 
     342          57 :     return result;
     343             : }
     344             : 
     345             : template <typename Arg>
     346             : [[nodiscard]]
     347          66 : auto create_arg_value(const argument_string& arg_str,
     348             :                       const trimmed_command& trimmed)
     349             : {
     350             :     static_assert(std::tuple_size_v<script::type::all> == 4,
     351             :                   "Number of script types changed, update this function");
     352             : 
     353             :     try {
     354             :         if constexpr (std::is_same_v<typename Arg::value_type, script::type::uint>) {
     355          20 :             return utility::from_chars<typename Arg::value_type>(arg_str.value);
     356             :         } else if constexpr (std::is_same_v<typename Arg::value_type, script::type::ternary>) {
     357          35 :             return utility::from_chars<typename Arg::value_type>(arg_str.value);
     358             :         } else if constexpr (std::is_same_v<typename Arg::value_type, script::type::reg>) {
     359           7 :             if (arg_str.value == "A") {
     360           2 :                 return script::type::reg::A;
     361           5 :             } else if (arg_str.value == "C") {
     362           2 :                 return script::type::reg::C;
     363           3 :             } else if (arg_str.value == "D") {
     364           2 :                 return script::type::reg::D;
     365             :             } else {
     366           3 :                 throw parse_exception{"Unrecognised vCPU register ID: "s + arg_str.value,
     367           2 :                                       trimmed.map(arg_str.value_index)};
     368             :             }
     369             :         } else if constexpr (std::is_same_v<typename Arg::value_type, script::type::string>) {
     370             :             // Strip off the leading and trailing double quotes
     371           4 :             auto string_arg = arg_str.value;
     372           4 :             string_arg.remove_prefix(1);
     373           4 :             string_arg.remove_suffix(1);
     374           8 :             return utility::unescape_ascii(string_arg);
     375             :         } else {
     376             :             static_assert(traits::always_false_v<Arg>, "Unhandled argument type");
     377             :         }
     378           6 :     } catch (parse_exception& e) {
     379           1 :         throw;
     380           8 :     } catch (std::exception& e) {
     381           4 :         throw parse_exception{e.what(), trimmed.map(arg_str.value_index)};
     382             :     }
     383             : }
     384             : 
     385             : [[nodiscard]]
     386             : script::functions::function_variant
     387          64 : create_fn(std::string_view fn_name,
     388             :           const std::vector<argument_string>& fn_args,
     389             :           const trimmed_command& trimmed)
     390             : {
     391             :     // Find the matching function type, then iterate over the args to call the
     392             :     // function type's constructor
     393         128 :     auto result = std::optional<script::functions::function_variant>{};
     394         544 :     utility::tuple_type_iterator<function_types>([&](auto, auto fn_ptr) {
     395             :         using Fn = std::remove_pointer_t<decltype(fn_ptr)>;
     396             : 
     397         480 :         if (Fn::name() == fn_name) {
     398          60 :             auto tuple_args = typename Fn::args_type{};
     399         125 :             for (auto&& arg : fn_args) {
     400          67 :                 auto found = false;
     401         370 :                 utility::tuple_type_iterator<typename Fn::args_type>([&](auto j, auto arg_ptr) {
     402             :                     using Arg = std::remove_pointer_t<decltype(arg_ptr)>;
     403             : 
     404          91 :                     if (Arg::name() == arg.name) {
     405         133 :                         std::get<j>(tuple_args).value = create_arg_value<Arg>(arg, trimmed);
     406          61 :                         found = true;
     407             :                     }
     408             :                 });
     409             : 
     410          62 :                 if (!found) [[unlikely]] {
     411           3 :                     throw parse_exception{"Unrecognised argument name: "s + arg.name,
     412           2 :                                           trimmed.map(arg.name_index)};
     413             :                 }
     414             :             }
     415             : 
     416          58 :             result = std::make_from_tuple<Fn>(std::move(tuple_args));
     417             :         }
     418         474 :     });
     419             : 
     420          58 :     if (!result) [[unlikely]] {
     421             :         // Should never get here as the function names have already been checked
     422           0 :         throw parse_exception{"DEV_ERROR: Unrecognised function name: "s + fn_name};
     423             :     }
     424             : 
     425         116 :     return *result;
     426             : }
     427             : }
     428             : 
     429          30 : script::functions::sequence script::parse(std::istream& stream)
     430             : {
     431          30 :     auto fn_seq = script::functions::sequence{};
     432          30 :     auto src_loc = source_location{1, 1};   // Script level source location
     433             : 
     434             :     try {
     435             :         // Read in a command's worth of data.  Then strip out the whitespace
     436             :         // (not if it is inside string data) and any comments - however this
     437             :         // needs to be done in such a way that the source location data is
     438             :         // preserved so that any errors can be mapped back to the original
     439             :         // script
     440          60 :         auto trimmed = trimmed_command{};
     441         118 :         for (auto fn_cmd = ""s; std::getline(stream, fn_cmd, ';'); ) {
     442             :             // This also returns true if fn_cmd is empty
     443          88 :             if (only_whitespace(fn_cmd)) {
     444           3 :                 continue;
     445             :             }
     446             : 
     447          85 :             trimmed.trim(fn_cmd);
     448             : 
     449             :             // Extract the function name and check it is one of the known
     450             :             // functions
     451          84 :             const auto fn_name = extract_fn_name(trimmed);
     452          81 :             const auto fn_args = extract_fn_args(fn_name.size(), trimmed);
     453          64 :             fn_seq.push_back(create_fn(fn_name, fn_args, trimmed));
     454             : 
     455             :             // Update script-level source location
     456          58 :             update_source_location(src_loc, trimmed.end_source_location());
     457             :         }
     458          54 :     } catch (parse_exception& e) {
     459             :         // Append the command-level source location with the script-level one
     460          27 :         update_source_location(src_loc, e.location());
     461          27 :         throw parse_exception{e.message(), src_loc};
     462           0 :     } catch (std::exception& e) {
     463             :         // If an unexpected error occurs then src_loc may be inaccurate, but it
     464             :         // at least gives a location minimum and the command end as a maximum
     465           0 :         throw parse_exception{e.what(), src_loc};
     466             :     }
     467             : 
     468           6 :     return fn_seq;
     469             : }
     470             : 
     471           2 : script::functions::sequence script::parse(const std::filesystem::path& path)
     472             : {
     473           4 :     auto stream = std::ifstream{path};
     474           2 :     if (!stream) {
     475           1 :         throw parse_exception{"Unable to read "s + path.string()};
     476             :     }
     477             : 
     478           2 :     return parse(stream);
     479             : }

Generated by: LCOV version 1.14