Merge pull request #971 from lnis-uofu/shell_source

Cleanup: Move command string parsing to OpenFPGA Tokenizer
This commit is contained in:
tangxifan 2023-01-02 16:03:35 -08:00 committed by GitHub
commit 7b8214c4cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 21 deletions

View File

@ -487,13 +487,9 @@ template <class T>
int Shell<T>::execute_command(const char* cmd_line, int Shell<T>::execute_command(const char* cmd_line,
T& common_context) { T& common_context) {
openfpga::StringToken tokenizer(cmd_line); openfpga::StringToken tokenizer(cmd_line);
tokenizer.add_delim(' ');
/* Do not split the string in each quote "", as they should be a piece */ /* Do not split the string in each quote "", as they should be a piece */
std::vector<size_t> quote_anchors; std::vector<size_t> quote_anchors = tokenizer.find_positions('\"');
size_t quote_found = tokenizer.data().find("\"");
while (std::string::npos != quote_found) {
quote_anchors.push_back(quote_found);
quote_found = tokenizer.data().find("\"", quote_found+1);
}
/* Quote should be not be started with! */ /* Quote should be not be started with! */
if (!quote_anchors.empty() && quote_anchors.front() == 0) { if (!quote_anchors.empty() && quote_anchors.front() == 0) {
VTR_LOG("Quotes (\") should NOT be the first charactor in command line: '%s'\n", cmd_line); VTR_LOG("Quotes (\") should NOT be the first charactor in command line: '%s'\n", cmd_line);
@ -509,21 +505,7 @@ int Shell<T>::execute_command(const char* cmd_line,
if (quote_anchors.empty()) { if (quote_anchors.empty()) {
tokens = tokenizer.split(" "); tokens = tokenizer.split(" ");
} else { } else {
/* There are pairs of quotes, identify the chunk which should be split*/ tokens = tokenizer.split_by_chunks('\"');
std::vector<std::string> token_chunks = tokenizer.split("\"");
for (size_t ichunk = 0; ichunk < token_chunks.size(); ichunk++) {
/* Chunk with even index (including the first) is always out of two quote -> Split!
* Chunk with odd index is always between two quotes -> Do not split!
*/
if (ichunk % 2 == 0) {
openfpga::StringToken chunk_tokenizer(token_chunks[ichunk]);
for (std::string curr_token : chunk_tokenizer.split(" ")) {
tokens.push_back(curr_token);
}
} else {
tokens.push_back(token_chunks[ichunk]);
}
}
} }
/* Find if the command name is valid */ /* Find if the command name is valid */

View File

@ -89,6 +89,41 @@ std::vector<std::string> StringToken::split() {
return split(delims); return split(delims);
} }
std::vector<size_t> StringToken::find_positions(const char& delim) const {
std::vector<size_t> anchors;
size_t found = data_.find(delim);
while (std::string::npos != found) {
anchors.push_back(found);
found = data_.find(delim, found + 1);
}
return anchors;
}
std::vector<std::string> StringToken::split_by_chunks(
const char& chunk_delim, const bool& split_odd_chunk) const {
size_t chunk_idx_mod = 0;
if (split_odd_chunk) {
chunk_idx_mod = 1;
}
std::vector<std::string> tokens;
/* There are pairs of quotes, identify the chunk which should be split*/
std::vector<std::string> token_chunks = split(chunk_delim);
for (size_t ichunk = 0; ichunk < token_chunks.size(); ichunk++) {
/* Chunk with even index (including the first) is always out of two quote ->
* Split! Chunk with odd index is always between two quotes -> Do not split!
*/
if (ichunk % 2 == chunk_idx_mod) {
StringToken chunk_tokenizer(token_chunks[ichunk]);
for (std::string curr_token : chunk_tokenizer.split()) {
tokens.push_back(curr_token);
}
} else {
tokens.push_back(token_chunks[ichunk]);
}
}
return tokens;
}
/************************************************************************ /************************************************************************
* Public Mutators * Public Mutators
***********************************************************************/ ***********************************************************************/

View File

@ -27,6 +27,28 @@ class StringToken {
std::vector<std::string> split(const char* delim) const; std::vector<std::string> split(const char* delim) const;
std::vector<std::string> split(const std::vector<char>& delim) const; std::vector<std::string> split(const std::vector<char>& delim) const;
std::vector<std::string> split(); std::vector<std::string> split();
/** @brief Find the position (i-th charactor) in a string for a given
* delimiter, it will return a list of positions For example, to find the
* position of all quotes (") in a string: "we" are good The following code is
* suggested: StringToken tokenizer("\"we\" are good"); std::vector<size_t>
* anchors = tokenizer.find_positions('\"') The following vector will be
* returned: [0, 3] */
std::vector<size_t> find_positions(const char& delim) const;
/** @brief split the string for each chunk. This is useful where there are
* chunks of substring should not be splitted by the given delimiter For
* example, to split the string with quotes (") in a string: source "cmdA
* --opt1 val1;cmdB --opt2 val2" --verbose where the string between the two
* quotes should not be splitted The following code is suggested: StringToken
* tokenizer("source \"cmdA --opt1 val1;cmdB --opt2 val2\" --verbose");
* std::vector<std::string> tokenizer.split_by_chunks('\"', true);
* The following vector will be returned:
* ["source" "cmdA --opt1 val1;cmdB --opt2 val2" "--verbose"]
*
* .. note:: The option ``split_odd_chunk`` is useful when the chunk delimiter
* appears at the beginning of the string.
*/
std::vector<std::string> split_by_chunks(
const char& chunk_delim, const bool& split_odd_chunk = false) const;
public: /* Public Mutators */ public: /* Public Mutators */
void set_data(const std::string& data); void set_data(const std::string& data);