[engine] move shell cmd split function to openfpga tokenizer

2023-01-02 12:38:16 -08:00 · 2023-01-02 12:38:16 -08:00 · 994402ec9a
parent 33c0c3a972
commit 994402ec9a
3 changed files with 59 additions and 21 deletions
--- a/libs/libopenfpgashell/src/shell.tpp
+++ b/libs/libopenfpgashell/src/shell.tpp
@ -487,13 +487,9 @@ template <class T>
 int Shell<T>::execute_command(const char* cmd_line,
                               T& common_context) {
  openfpga::StringToken tokenizer(cmd_line);  
+  tokenizer.add_delim(' ');
  /* Do not split the string in each quote "", as they should be a piece */
-  std::vector<size_t> quote_anchors;
-  size_t quote_found = tokenizer.data().find("\"");
-  while (std::string::npos != quote_found) {
-    quote_anchors.push_back(quote_found);
-    quote_found = tokenizer.data().find("\"", quote_found+1);
-  }
+  std::vector<size_t> quote_anchors = tokenizer.find_positions('\"');
  /* Quote should be not be started with! */
  if (!quote_anchors.empty() && quote_anchors.front() == 0) {
    VTR_LOG("Quotes (\") should NOT be the first charactor in command line: '%s'\n", cmd_line);
@ -509,21 +505,7 @@ int Shell<T>::execute_command(const char* cmd_line,
  if (quote_anchors.empty()) {
    tokens = tokenizer.split(" ");
  } else {
-    /* There are pairs of quotes, identify the chunk which should be split*/
-    std::vector<std::string> token_chunks = tokenizer.split("\"");
-    for (size_t ichunk = 0; ichunk < token_chunks.size(); ichunk++) {
-      /* Chunk with even index (including the first) is always out of two quote -> Split!
-       * Chunk with odd index is always between two quotes -> Do not split!
-       */
-      if (ichunk % 2 == 0) {
-        openfpga::StringToken chunk_tokenizer(token_chunks[ichunk]);  
-        for (std::string curr_token : chunk_tokenizer.split(" ")) {
-          tokens.push_back(curr_token);
-        }
-      } else {
-        tokens.push_back(token_chunks[ichunk]);
-      }
-    }
+    tokens = tokenizer.split_by_chunks('\"');
  } 

  /* Find if the command name is valid */
--- a/libs/libopenfpgautil/src/openfpga_tokenizer.cpp
+++ b/libs/libopenfpgautil/src/openfpga_tokenizer.cpp
@ -89,6 +89,40 @@ std::vector<std::string> StringToken::split() {
  return split(delims);
 }

+std::vector<size_t> StringToken::find_positions(const char& delim) const {
+  std::vector<size_t> anchors;
+  size_t found = data_.find(delim);
+  while (std::string::npos != found) {
+    anchors.push_back(found);
+    found = data_.find(delim, found + 1);
+  }
+  return anchors;
+}
+
+std::vector<std::string> StringToken::split_by_chunks(const char& chunk_delim, const bool& split_odd_chunk) const {
+  size_t chunk_idx_mod = 0;
+  if (split_odd_chunk) {
+    chunk_idx_mod = 1;
+  }
+  std::vector<std::string> tokens; 
+  /* There are pairs of quotes, identify the chunk which should be split*/
+  std::vector<std::string> token_chunks = split(chunk_delim);
+  for (size_t ichunk = 0; ichunk < token_chunks.size(); ichunk++) {
+    /* Chunk with even index (including the first) is always out of two quote -> Split!
+     * Chunk with odd index is always between two quotes -> Do not split!
+     */
+    if (ichunk % 2 == chunk_idx_mod) {
+      StringToken chunk_tokenizer(token_chunks[ichunk]);  
+      for (std::string curr_token : chunk_tokenizer.split()) {
+        tokens.push_back(curr_token);
+      }
+    } else {
+      tokens.push_back(token_chunks[ichunk]);
+    }
+  }
+  return tokens;
+}
+
 /************************************************************************
 * Public Mutators
 ***********************************************************************/
--- a/libs/libopenfpgautil/src/openfpga_tokenizer.h
+++ b/libs/libopenfpgautil/src/openfpga_tokenizer.h
@ -27,6 +27,28 @@ class StringToken {
  std::vector<std::string> split(const char* delim) const;
  std::vector<std::string> split(const std::vector<char>& delim) const;
  std::vector<std::string> split();
+  /** @brief Find the position (i-th charactor) in a string for a given delimiter, it will return a list of positions
+   * For example, to find the position of all quotes (") in a string:
+   *   "we" are good
+   * The following code is suggested:
+   *   StringToken tokenizer("\"we\" are good");
+   *   std::vector<size_t> anchors = tokenizer.find_positions('\"')
+   * The following vector will be returned:
+   *  [0, 3] */
+  std::vector<size_t> find_positions(const char& delim) const;
+  /** @brief split the string for each chunk. This is useful where there are chunks of substring should not be splitted by the given delimiter
+   * For example, to split the string with quotes (") in a string:
+   *   source "cmdA --opt1 val1;cmdB --opt2 val2" --verbose
+   * where the string between the two quotes should not be splitted
+   * The following code is suggested:
+   *   StringToken tokenizer("source \"cmdA --opt1 val1;cmdB --opt2 val2\" --verbose");
+   *   std::vector<std::string> tokenizer.split_by_chunks('\"', true);
+   * The following vector will be returned:  
+   *   ["source" "cmdA --opt1 val1;cmdB --opt2 val2" "--verbose"]
+   * 
+   * .. note:: The option ``split_odd_chunk`` is useful when the chunk delimiter appears at the beginning of the string. 
+   */
+  std::vector<std::string> split_by_chunks(const char& chunk_delim, const bool& split_odd_chunk = false) const;

 public: /* Public Mutators */
  void set_data(const std::string& data);