[engine] move shell cmd split function to openfpga tokenizer

This commit is contained in:
tangxifan 2023-01-02 12:38:16 -08:00
parent 33c0c3a972
commit 994402ec9a
3 changed files with 59 additions and 21 deletions

View File

@ -487,13 +487,9 @@ template <class T>
int Shell<T>::execute_command(const char* cmd_line,
T& common_context) {
openfpga::StringToken tokenizer(cmd_line);
tokenizer.add_delim(' ');
/* Do not split the string in each quote "", as they should be a piece */
std::vector<size_t> quote_anchors;
size_t quote_found = tokenizer.data().find("\"");
while (std::string::npos != quote_found) {
quote_anchors.push_back(quote_found);
quote_found = tokenizer.data().find("\"", quote_found+1);
}
std::vector<size_t> quote_anchors = tokenizer.find_positions('\"');
/* Quote should be not be started with! */
if (!quote_anchors.empty() && quote_anchors.front() == 0) {
VTR_LOG("Quotes (\") should NOT be the first charactor in command line: '%s'\n", cmd_line);
@ -509,21 +505,7 @@ int Shell<T>::execute_command(const char* cmd_line,
if (quote_anchors.empty()) {
tokens = tokenizer.split(" ");
} else {
/* There are pairs of quotes, identify the chunk which should be split*/
std::vector<std::string> token_chunks = tokenizer.split("\"");
for (size_t ichunk = 0; ichunk < token_chunks.size(); ichunk++) {
/* Chunk with even index (including the first) is always out of two quote -> Split!
* Chunk with odd index is always between two quotes -> Do not split!
*/
if (ichunk % 2 == 0) {
openfpga::StringToken chunk_tokenizer(token_chunks[ichunk]);
for (std::string curr_token : chunk_tokenizer.split(" ")) {
tokens.push_back(curr_token);
}
} else {
tokens.push_back(token_chunks[ichunk]);
}
}
tokens = tokenizer.split_by_chunks('\"');
}
/* Find if the command name is valid */

View File

@ -89,6 +89,40 @@ std::vector<std::string> StringToken::split() {
return split(delims);
}
std::vector<size_t> StringToken::find_positions(const char& delim) const {
std::vector<size_t> anchors;
size_t found = data_.find(delim);
while (std::string::npos != found) {
anchors.push_back(found);
found = data_.find(delim, found + 1);
}
return anchors;
}
std::vector<std::string> StringToken::split_by_chunks(const char& chunk_delim, const bool& split_odd_chunk) const {
size_t chunk_idx_mod = 0;
if (split_odd_chunk) {
chunk_idx_mod = 1;
}
std::vector<std::string> tokens;
/* There are pairs of quotes, identify the chunk which should be split*/
std::vector<std::string> token_chunks = split(chunk_delim);
for (size_t ichunk = 0; ichunk < token_chunks.size(); ichunk++) {
/* Chunk with even index (including the first) is always out of two quote -> Split!
* Chunk with odd index is always between two quotes -> Do not split!
*/
if (ichunk % 2 == chunk_idx_mod) {
StringToken chunk_tokenizer(token_chunks[ichunk]);
for (std::string curr_token : chunk_tokenizer.split()) {
tokens.push_back(curr_token);
}
} else {
tokens.push_back(token_chunks[ichunk]);
}
}
return tokens;
}
/************************************************************************
* Public Mutators
***********************************************************************/

View File

@ -27,6 +27,28 @@ class StringToken {
std::vector<std::string> split(const char* delim) const;
std::vector<std::string> split(const std::vector<char>& delim) const;
std::vector<std::string> split();
/** @brief Find the position (i-th charactor) in a string for a given delimiter, it will return a list of positions
* For example, to find the position of all quotes (") in a string:
* "we" are good
* The following code is suggested:
* StringToken tokenizer("\"we\" are good");
* std::vector<size_t> anchors = tokenizer.find_positions('\"')
* The following vector will be returned:
* [0, 3] */
std::vector<size_t> find_positions(const char& delim) const;
/** @brief split the string for each chunk. This is useful where there are chunks of substring should not be splitted by the given delimiter
* For example, to split the string with quotes (") in a string:
* source "cmdA --opt1 val1;cmdB --opt2 val2" --verbose
* where the string between the two quotes should not be splitted
* The following code is suggested:
* StringToken tokenizer("source \"cmdA --opt1 val1;cmdB --opt2 val2\" --verbose");
* std::vector<std::string> tokenizer.split_by_chunks('\"', true);
* The following vector will be returned:
* ["source" "cmdA --opt1 val1;cmdB --opt2 val2" "--verbose"]
*
* .. note:: The option ``split_odd_chunk`` is useful when the chunk delimiter appears at the beginning of the string.
*/
std::vector<std::string> split_by_chunks(const char& chunk_delim, const bool& split_odd_chunk = false) const;
public: /* Public Mutators */
void set_data(const std::string& data);