mirror of https://github.com/YosysHQ/yosys.git
fmt,cxxrtl: add `UNICHAR` format type.
This format type is used to print an Unicode character (code point) as its UTF-8 serialization. To this end, two UTF-8 decoders (one for fmt, one for cxxrtl) are added for rendering. When converted to a Verilog format specifier, `UNICHAR` degrades to `%c` with the low 7 bits of the code point, which has equivalent behavior for inputs not exceeding ASCII. (SystemVerilog leaves source and display encodings completely undefined.)
This commit is contained in:
parent
1780e2eb1e
commit
bf5a960668
|
@ -1013,13 +1013,14 @@ struct fmt_part {
|
|||
LITERAL = 0,
|
||||
INTEGER = 1,
|
||||
STRING = 2,
|
||||
VLOG_TIME = 3,
|
||||
UNICHAR = 3,
|
||||
VLOG_TIME = 4,
|
||||
} type;
|
||||
|
||||
// LITERAL type
|
||||
std::string str;
|
||||
|
||||
// INTEGER/STRING types
|
||||
// INTEGER/STRING/UNICHAR types
|
||||
// + value<Bits> val;
|
||||
|
||||
// INTEGER/STRING/VLOG_TIME types
|
||||
|
@ -1073,6 +1074,25 @@ struct fmt_part {
|
|||
break;
|
||||
}
|
||||
|
||||
case UNICHAR: {
|
||||
uint32_t codepoint = val.template get<uint32_t>();
|
||||
if (codepoint >= 0x10000)
|
||||
buf += (char)(0xf0 | (codepoint >> 18));
|
||||
else if (codepoint >= 0x800)
|
||||
buf += (char)(0xe0 | (codepoint >> 12));
|
||||
else if (codepoint >= 0x80)
|
||||
buf += (char)(0xc0 | (codepoint >> 6));
|
||||
else
|
||||
buf += (char)codepoint;
|
||||
if (codepoint >= 0x10000)
|
||||
buf += (char)(0x80 | ((codepoint >> 12) & 0x3f));
|
||||
if (codepoint >= 0x800)
|
||||
buf += (char)(0x80 | ((codepoint >> 6) & 0x3f));
|
||||
if (codepoint >= 0x80)
|
||||
buf += (char)(0x80 | ((codepoint >> 0) & 0x3f));
|
||||
break;
|
||||
}
|
||||
|
||||
case INTEGER: {
|
||||
size_t width = Bits;
|
||||
if (base != 10) {
|
||||
|
|
|
@ -42,9 +42,9 @@ void Fmt::parse_rtlil(const RTLIL::Cell *cell) {
|
|||
} else if (fmt.substr(i, 2) == "{{") {
|
||||
part.str += '{';
|
||||
++i;
|
||||
} else if (fmt[i] == '}')
|
||||
} else if (fmt[i] == '}') {
|
||||
log_assert(false && "Unexpected '}' in format string");
|
||||
else if (fmt[i] == '{') {
|
||||
} else if (fmt[i] == '{') {
|
||||
if (!part.str.empty()) {
|
||||
part.type = FmtPart::LITERAL;
|
||||
parts.push_back(part);
|
||||
|
@ -74,6 +74,12 @@ void Fmt::parse_rtlil(const RTLIL::Cell *cell) {
|
|||
part.sig = args.extract(0, arg_size);
|
||||
args.remove(0, arg_size);
|
||||
|
||||
if (fmt[i] == 'U') {
|
||||
part.type = FmtPart::UNICHAR;
|
||||
++i;
|
||||
goto success;
|
||||
}
|
||||
|
||||
if (fmt[i] == '>')
|
||||
part.justify = FmtPart::RIGHT;
|
||||
else if (fmt[i] == '<')
|
||||
|
@ -156,6 +162,7 @@ void Fmt::parse_rtlil(const RTLIL::Cell *cell) {
|
|||
log_assert(false && "Unexpected end in format substitution");
|
||||
}
|
||||
|
||||
success:
|
||||
if (fmt[i] != '}')
|
||||
log_assert(false && "Expected '}' after format substitution");
|
||||
|
||||
|
@ -188,6 +195,11 @@ void Fmt::emit_rtlil(RTLIL::Cell *cell) const {
|
|||
}
|
||||
break;
|
||||
|
||||
case FmtPart::UNICHAR:
|
||||
log_assert(part.sig.size() <= 32);
|
||||
fmt += "{U}";
|
||||
break;
|
||||
|
||||
case FmtPart::VLOG_TIME:
|
||||
log_assert(part.sig.size() == 0);
|
||||
YS_FALLTHROUGH
|
||||
|
@ -568,6 +580,16 @@ std::vector<VerilogFmtArg> Fmt::emit_verilog() const
|
|||
break;
|
||||
}
|
||||
|
||||
case FmtPart::UNICHAR: {
|
||||
VerilogFmtArg arg;
|
||||
arg.type = VerilogFmtArg::INTEGER;
|
||||
arg.sig = part.sig.extract(0, 7); // only ASCII
|
||||
args.push_back(arg);
|
||||
|
||||
fmt.str += "%c";
|
||||
break;
|
||||
}
|
||||
|
||||
case FmtPart::VLOG_TIME: {
|
||||
VerilogFmtArg arg;
|
||||
arg.type = VerilogFmtArg::TIME;
|
||||
|
@ -630,6 +652,7 @@ void Fmt::emit_cxxrtl(std::ostream &os, std::string indent, std::function<void(c
|
|||
case FmtPart::LITERAL: os << "LITERAL"; break;
|
||||
case FmtPart::INTEGER: os << "INTEGER"; break;
|
||||
case FmtPart::STRING: os << "STRING"; break;
|
||||
case FmtPart::UNICHAR: os << "UNICHAR"; break;
|
||||
case FmtPart::VLOG_TIME: os << "VLOG_TIME"; break;
|
||||
}
|
||||
os << ", ";
|
||||
|
@ -671,6 +694,26 @@ std::string Fmt::render() const
|
|||
str += part.str;
|
||||
break;
|
||||
|
||||
case FmtPart::UNICHAR: {
|
||||
RTLIL::Const value = part.sig.as_const();
|
||||
uint32_t codepoint = value.as_int();
|
||||
if (codepoint >= 0x10000)
|
||||
str += (char)(0xf0 | (codepoint >> 18));
|
||||
else if (codepoint >= 0x800)
|
||||
str += (char)(0xe0 | (codepoint >> 12));
|
||||
else if (codepoint >= 0x80)
|
||||
str += (char)(0xc0 | (codepoint >> 6));
|
||||
else
|
||||
str += (char)codepoint;
|
||||
if (codepoint >= 0x10000)
|
||||
str += (char)(0x80 | ((codepoint >> 12) & 0x3f));
|
||||
if (codepoint >= 0x800)
|
||||
str += (char)(0x80 | ((codepoint >> 6) & 0x3f));
|
||||
if (codepoint >= 0x80)
|
||||
str += (char)(0x80 | ((codepoint >> 0) & 0x3f));
|
||||
break;
|
||||
}
|
||||
|
||||
case FmtPart::INTEGER:
|
||||
case FmtPart::STRING:
|
||||
case FmtPart::VLOG_TIME: {
|
||||
|
|
|
@ -56,13 +56,14 @@ struct FmtPart {
|
|||
LITERAL = 0,
|
||||
INTEGER = 1,
|
||||
STRING = 2,
|
||||
VLOG_TIME = 3,
|
||||
UNICHAR = 3,
|
||||
VLOG_TIME = 4,
|
||||
} type;
|
||||
|
||||
// LITERAL type
|
||||
std::string str;
|
||||
|
||||
// INTEGER/STRING types
|
||||
// INTEGER/STRING/UNICHAR types
|
||||
RTLIL::SigSpec sig;
|
||||
|
||||
// INTEGER/STRING/VLOG_TIME types
|
||||
|
|
Loading…
Reference in New Issue