71 lines
2.9 KiB
C++
71 lines
2.9 KiB
C++
#ifndef OPENFPGA_TOKENIZER_H
|
|
#define OPENFPGA_TOKENIZER_H
|
|
|
|
/********************************************************************
|
|
* Include header files that are required by data structure declaration
|
|
*******************************************************************/
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
/* namespace openfpga begins */
|
|
namespace openfpga {
|
|
|
|
/************************************************************************
|
|
* This file includes a tokenizer for string objects
|
|
* It splits a string with given delima and return a vector of tokens
|
|
* It can accept different delima in splitting strings
|
|
***********************************************************************/
|
|
|
|
class StringToken {
|
|
public: /* Constructors*/
|
|
StringToken(const std::string& data);
|
|
|
|
public: /* Public Accessors */
|
|
std::string data() const;
|
|
std::vector<std::string> split(const std::string& delims) const;
|
|
std::vector<std::string> split(const char& delim) const;
|
|
std::vector<std::string> split(const char* delim) const;
|
|
std::vector<std::string> split(const std::vector<char>& delim) const;
|
|
std::vector<std::string> split();
|
|
/** @brief Find the position (i-th charactor) in a string for a given
|
|
* delimiter, it will return a list of positions For example, to find the
|
|
* position of all quotes (") in a string: "we" are good The following code is
|
|
* suggested: StringToken tokenizer("\"we\" are good"); std::vector<size_t>
|
|
* anchors = tokenizer.find_positions('\"') The following vector will be
|
|
* returned: [0, 3] */
|
|
std::vector<size_t> find_positions(const char& delim) const;
|
|
/** @brief split the string for each chunk. This is useful where there are
|
|
* chunks of substring should not be splitted by the given delimiter For
|
|
* example, to split the string with quotes (") in a string: source "cmdA
|
|
* --opt1 val1;cmdB --opt2 val2" --verbose where the string between the two
|
|
* quotes should not be splitted The following code is suggested: StringToken
|
|
* tokenizer("source \"cmdA --opt1 val1;cmdB --opt2 val2\" --verbose");
|
|
* std::vector<std::string> tokenizer.split_by_chunks('\"', true);
|
|
* The following vector will be returned:
|
|
* ["source" "cmdA --opt1 val1;cmdB --opt2 val2" "--verbose"]
|
|
*
|
|
* .. note:: The option ``split_odd_chunk`` is useful when the chunk delimiter
|
|
* appears at the beginning of the string.
|
|
*/
|
|
std::vector<std::string> split_by_chunks(
|
|
const char& chunk_delim, const bool& split_odd_chunk = false) const;
|
|
|
|
public: /* Public Mutators */
|
|
void set_data(const std::string& data);
|
|
void add_delim(const char& delim);
|
|
void ltrim(const std::string& sensitive_word);
|
|
void rtrim(const std::string& sensitive_word);
|
|
void trim();
|
|
|
|
private: /* Private Mutators */
|
|
void add_default_delim();
|
|
|
|
private: /* Internal data */
|
|
std::string data_; /* Lines to be splited */
|
|
std::vector<char> delims_;
|
|
};
|
|
|
|
} // namespace openfpga
|
|
|
|
#endif
|