From f75bc6c7aac92c7c1c8954ec7fe5325a94e6e491 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 13 Dec 2020 18:16:55 +0000 Subject: [PATCH] cxxrtl: disable optimization of debug_items(). Implementing outlining has greatly increased the amount of debug information in a typical build, and consequently exposed performance issues in C++ compilers, which are similar for both GCC and Clang; the compile time of Minerva SoC SRAM increased almost twofold. Although one would expect the slowdown to be caused by the increased use of templates in `debug_eval()`, it is actually almost entirely attributable to optimizations and codegen for `debug_items()`. Fortunately, it is neither possible nor desirable to optimize `debug_items()`: in most cases it is called exactly once, and its body is a linear sequence of calls with unique arguments. This commit turns off optimizations for `debug_items()` on GCC and Clang, improving -Os compile time of Minerva SoC SRAM by ~40% (!) --- backends/cxxrtl/cxxrtl.h | 17 ++++++++++++++--- backends/cxxrtl/cxxrtl_backend.cc | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h index 59393e415..3c315c7df 100644 --- a/backends/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/cxxrtl.h @@ -41,18 +41,29 @@ #include +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector. // It generates a lot of specialized template functions with relatively large bodies that, when inlined // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities. // Because of this, most of the CXXRTL runtime must be always inlined for best performance. -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif #if __has_attribute(always_inline) #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__)) #else #define CXXRTL_ALWAYS_INLINE inline #endif +// Conversely, some functions in the generated code are extremely large yet very cold, with both of these +// properties being extreme enough to confuse C++ compilers into spending pathological amounts of time +// on a futile (the code becomes worse) attempt to optimize the least important parts of code. +#if __has_attribute(optnone) +#define CXXRTL_EXTREMELY_COLD __attribute__((__optnone__)) +#elif __has_attribute(optimize) +#define CXXRTL_EXTREMELY_COLD __attribute__((__optimize__(0))) +#else +#define CXXRTL_EXTREMELY_COLD +#endif // CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior // of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at diff --git a/backends/cxxrtl/cxxrtl_backend.cc b/backends/cxxrtl/cxxrtl_backend.cc index fa19a8dd6..7bf44626a 100644 --- a/backends/cxxrtl/cxxrtl_backend.cc +++ b/backends/cxxrtl/cxxrtl_backend.cc @@ -2038,6 +2038,7 @@ struct CxxrtlWorker { f << indent << "}\n"; f << "\n"; } + f << indent << "CXXRTL_EXTREMELY_COLD\n"; f << indent << "void " << mangle(module) << "::debug_info(debug_items &items, std::string path) {\n"; dump_debug_info_method(module); f << indent << "}\n";