From 8634f3e2d845ff1dbe0c7d999ed5382afd4a5ba8 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Mon, 26 Apr 2021 22:38:24 -0700 Subject: [PATCH 01/16] drop in some new functions that handle reading bitcode.ll and inline.ll --- src/SchemeFFI.cpp | 107 ++++++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 47 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index f2828161b..90d603f19 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -184,7 +184,60 @@ void initSchemeFFI(scheme* sc) } } -static long long llvm_emitcounter = 0; +static std::string fileToString(const std::string &fileName) { + std::ifstream inStream(fileName); + std::stringstream inString; + inString << inStream.rdbuf(); + return inString.str(); +} + +static const std::string inlineDotLLString() { +#ifdef DYLIB + auto fs = cmrc::xtm::get_filesystem(); + auto data = fs.open("runtime/inline.ll"); + static const std::string sInlineDotLLString(data.begin(), data.end()); +#else + static const std::string sInlineDotLLString( + fileToString(UNIV::SHARE_DIR + "/runtime/inline.ll")); +#endif + + return sInlineDotLLString; +} + +static const std::string bitcodeDotLLString() { +#ifdef DYLIB + auto fs = cmrc::xtm::get_filesystem(); + auto data = fs.open("runtime/bitcode.ll"); + static const std::string sBitcodeDotLLString(data.begin(), data.end()); +#else + static const std::string sBitcodeDotLLString( + fileToString(UNIV::SHARE_DIR + "/runtime/bitcode.ll")); +#endif + + return sBitcodeDotLLString; +} + +// match @symbols @like @this_123 +static const std::regex sGlobalSymRegex( + "[ \t]@([-a-zA-Z$._][-a-zA-Z$._0-9]*)", + std::regex::optimize); + +// match "define @sym" +static const std::regex sDefineSymRegex( + "define[^\\n]+@([-a-zA-Z$._][-a-zA-Z$._0-9]*)", + std::regex::optimize | std::regex::ECMAScript); + +// template is temporary, we'll remove this once the refactoring is done +template +static void insertMatchingSymbols( + const std::string &code, const std::regex ®ex, + // std::unordered_set &containingSet + T &containingSet) +{ + std::copy(std::sregex_token_iterator(code.begin(), code.end(), regex, 1), + std::sregex_token_iterator(), + std::inserter(containingSet, containingSet.begin())); +} static std::string SanitizeType(llvm::Type* Type) { @@ -199,9 +252,6 @@ static std::string SanitizeType(llvm::Type* Type) return str; } -static std::regex sGlobalSymRegex("[ \t]@([-a-zA-Z$._][-a-zA-Z$._0-9]*)", std::regex::optimize); -static std::regex sDefineSymRegex("define[^\\n]+@([-a-zA-Z$._][-a-zA-Z$._0-9]*)", std::regex::optimize | std::regex::ECMAScript); - static llvm::Module* jitCompile(const std::string& String) { // Create some module to put our function into it. @@ -209,9 +259,6 @@ static llvm::Module* jitCompile(const std::string& String) legacy::PassManager* PM = extemp::EXTLLVM::PM; legacy::PassManager* PM_NO = extemp::EXTLLVM::PM_NO; - char modname[256]; - sprintf(modname, "xtmmodule_%lld", ++llvm_emitcounter); - std::string asmcode(String); SMDiagnostic pa; @@ -219,38 +266,12 @@ static llvm::Module* jitCompile(const std::string& String) static std::string sInlineBitcode; static std::unordered_set sInlineSyms; -#ifdef DYLIB - auto fs = cmrc::xtm::get_filesystem(); -#endif - if (sInlineString.empty()) { - { -#ifdef DYLIB - auto data = fs.open("runtime/bitcode.ll"); - sInlineString = std::string(data.begin(), data.end()); -#else - std::ifstream inStream(UNIV::SHARE_DIR + "/runtime/bitcode.ll"); - std::stringstream inString; - inString << inStream.rdbuf(); - sInlineString = inString.str(); -#endif - } - std::copy(std::sregex_token_iterator(sInlineString.begin(), sInlineString.end(), sGlobalSymRegex, 1), - std::sregex_token_iterator(), std::inserter(sInlineSyms, sInlineSyms.begin())); - { -#ifdef DYLIB - auto data = fs.open("runtime/inline.ll"); - std::string tString = std::string(data.begin(), data.end()); -#else - std::ifstream inStream(UNIV::SHARE_DIR + "/runtime/inline.ll"); - std::stringstream inString; - inString << inStream.rdbuf(); - std::string tString = inString.str(); -#endif - std::copy(std::sregex_token_iterator(tString.begin(), tString.end(), sGlobalSymRegex, 1), - std::sregex_token_iterator(), std::inserter(sInlineSyms, sInlineSyms.begin())); - } + sInlineString = bitcodeDotLLString(); + insertMatchingSymbols(sInlineString, sGlobalSymRegex, sInlineSyms); + insertMatchingSymbols(inlineDotLLString(), sGlobalSymRegex, sInlineSyms); } + if (sInlineBitcode.empty()) { // need to avoid parsing the types twice static bool first(true); @@ -260,17 +281,9 @@ static llvm::Module* jitCompile(const std::string& String) std::string bitcode; llvm::raw_string_ostream bitstream(sInlineBitcode); llvm::WriteBitcodeToFile(newModule.get(), bitstream); -#ifdef DYLIB - auto data = fs.open("runtime/inline.ll"); - sInlineString = std::string(data.begin(), data.end()); -#else - std::ifstream inStream(UNIV::SHARE_DIR + "/runtime/inline.ll"); - std::stringstream inString; - inString << inStream.rdbuf(); - sInlineString = inString.str(); -#endif + sInlineString = inlineDotLLString(); } else { -std::cout << pa.getMessage().str() << std::endl; + std::cout << pa.getMessage().str() << std::endl; abort(); } } else { From db8b0f000c9ee4922baa7a02eeb0a89c908be558 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Mon, 26 Apr 2021 22:45:08 -0700 Subject: [PATCH 02/16] and pull out a function for creating bitcode --- src/SchemeFFI.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 90d603f19..fbeab7d85 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -217,6 +217,19 @@ static const std::string bitcodeDotLLString() { return sBitcodeDotLLString; } +static std::string IRToBitcode(const std::string &ir) { + std::string bitcode; + llvm::SMDiagnostic pa; + auto mod(llvm::parseAssemblyString(ir, pa, llvm::getGlobalContext())); + if (!mod) { + pa.print("IRToBitcode", llvm::outs()); + std::abort(); + } + llvm::raw_string_ostream bitstream(bitcode); + llvm::WriteBitcodeToFile(mod.get(), bitstream); + return bitcode; +} + // match @symbols @like @this_123 static const std::regex sGlobalSymRegex( "[ \t]@([-a-zA-Z$._][-a-zA-Z$._0-9]*)", @@ -276,16 +289,8 @@ static llvm::Module* jitCompile(const std::string& String) // need to avoid parsing the types twice static bool first(true); if (!first) { - auto newModule(parseAssemblyString(sInlineString, pa, getGlobalContext())); - if (newModule) { - std::string bitcode; - llvm::raw_string_ostream bitstream(sInlineBitcode); - llvm::WriteBitcodeToFile(newModule.get(), bitstream); - sInlineString = inlineDotLLString(); - } else { - std::cout << pa.getMessage().str() << std::endl; - abort(); - } + sInlineBitcode = IRToBitcode(sInlineString); + sInlineString = inlineDotLLString(); } else { first = false; } From d2735193975d485adfc298b17f74746717e283b1 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Mon, 26 Apr 2021 22:49:34 -0700 Subject: [PATCH 03/16] replace some more std::copy calls --- src/SchemeFFI.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index fbeab7d85..c7bb20bc4 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -297,15 +297,20 @@ static llvm::Module* jitCompile(const std::string& String) } std::unique_ptr newModule; std::vector symbols; - std::copy(std::sregex_token_iterator(asmcode.begin(), asmcode.end(), sGlobalSymRegex, 1), - std::sregex_token_iterator(), std::inserter(symbols, symbols.begin())); + + // Copy all @symbols @like @this into symbols + insertMatchingSymbols(asmcode, sGlobalSymRegex, symbols); + std::sort(symbols.begin(), symbols.end()); auto end(std::unique(symbols.begin(), symbols.end())); + std::unordered_set ignoreSyms; - std::copy(std::sregex_token_iterator(asmcode.begin(), asmcode.end(), sDefineSymRegex, 1), - std::sregex_token_iterator(), std::inserter(ignoreSyms, ignoreSyms.begin())); + insertMatchingSymbols(asmcode, sDefineSymRegex, ignoreSyms); + std::string declarations; llvm::raw_string_ostream dstream(declarations); + + // Iterating over all @symbols @in @asmcode matching sGlobalSymRegex for (auto iter = symbols.begin(); iter != end; ++iter) { const char* sym(iter->c_str()); if (sInlineSyms.find(sym) != sInlineSyms.end() || ignoreSyms.find(sym) != ignoreSyms.end()) { @@ -315,7 +320,7 @@ static llvm::Module* jitCompile(const std::string& String) if (!gv) { continue; } - auto func(llvm::dyn_cast(gv)); + const llvm::Function* func(llvm::dyn_cast(gv)); if (func) { dstream << "declare " << SanitizeType(func->getReturnType()) << " @" << sym << " ("; bool first(true); From 767b24a7fa07b1d44cc2880e533962b044afcbd7 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Mon, 26 Apr 2021 22:51:33 -0700 Subject: [PATCH 04/16] minor cleanup --- src/SchemeFFI.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index c7bb20bc4..546c061c1 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -341,14 +341,15 @@ static llvm::Module* jitCompile(const std::string& String) dstream << '@' << sym << " = external global " << str.substr(0, str.length() - 1) << '\n'; } } -// std::cout << "**** DECL ****\n" << dstream.str() << "**** ENDDECL ****\n" << std::endl; + + // std::cout << "**** DECL ****\n" << dstream.str() << "**** ENDDECL ****\n" << std::endl; if (!sInlineBitcode.empty()) { auto modOrErr(parseBitcodeFile(llvm::MemoryBufferRef(sInlineBitcode, ""), getGlobalContext())); if (likely(modOrErr)) { newModule = std::move(modOrErr.get()); asmcode = sInlineString + dstream.str() + asmcode; if (parseAssemblyInto(llvm::MemoryBufferRef(asmcode, ""), *newModule, pa)) { -std::cout << "**** DECL ****\n" << dstream.str() << "**** ENDDECL ****\n" << std::endl; + std::cout << "**** DECL ****\n" << dstream.str() << "**** ENDDECL ****\n" << std::endl; newModule.reset(); } } @@ -365,11 +366,11 @@ std::cout << "**** DECL ****\n" << dstream.str() << "**** ENDDECL ****\n" << std PM_NO->run(*newModule); } } - //std::stringstream ss; + if (unlikely(!newModule)) { -// std::cout << "**** CODE ****\n" << asmcode << " **** ENDCODE ****" << std::endl; -// std::cout << pa.getMessage().str() << std::endl << pa.getLineNo() << std::endl; + // std::cout << "**** CODE ****\n" << asmcode << " **** ENDCODE ****" << std::endl; + // std::cout << pa.getMessage().str() << std::endl << pa.getLineNo() << std::endl; std::string errstr; llvm::raw_string_ostream ss(errstr); pa.print("LLVM IR",ss); From cd5687b5f82e0404a75883745b7ca4272dc1bade Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:20:33 -0700 Subject: [PATCH 05/16] pull out the code that declares globals --- src/SchemeFFI.cpp | 94 +++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 546c061c1..21dbab831 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -265,50 +265,20 @@ static std::string SanitizeType(llvm::Type* Type) return str; } -static llvm::Module* jitCompile(const std::string& String) -{ - // Create some module to put our function into it. - using namespace llvm; - legacy::PassManager* PM = extemp::EXTLLVM::PM; - legacy::PassManager* PM_NO = extemp::EXTLLVM::PM_NO; - - std::string asmcode(String); - SMDiagnostic pa; - - static std::string sInlineString; // This is a hack for now, but it *WORKS* - static std::string sInlineBitcode; - static std::unordered_set sInlineSyms; - - if (sInlineString.empty()) { - sInlineString = bitcodeDotLLString(); - insertMatchingSymbols(sInlineString, sGlobalSymRegex, sInlineSyms); - insertMatchingSymbols(inlineDotLLString(), sGlobalSymRegex, sInlineSyms); - } - - if (sInlineBitcode.empty()) { - // need to avoid parsing the types twice - static bool first(true); - if (!first) { - sInlineBitcode = IRToBitcode(sInlineString); - sInlineString = inlineDotLLString(); - } else { - first = false; - } - } - std::unique_ptr newModule; +static std::string +globalDeclarations(const std::string &asmcode, + const std::unordered_set &sInlineSyms) { std::vector symbols; - // Copy all @symbols @like @this into symbols insertMatchingSymbols(asmcode, sGlobalSymRegex, symbols); - std::sort(symbols.begin(), symbols.end()); auto end(std::unique(symbols.begin(), symbols.end())); std::unordered_set ignoreSyms; insertMatchingSymbols(asmcode, sDefineSymRegex, ignoreSyms); - std::string declarations; - llvm::raw_string_ostream dstream(declarations); + + std::stringstream ss; // Iterating over all @symbols @in @asmcode matching sGlobalSymRegex for (auto iter = symbols.begin(); iter != end; ++iter) { @@ -322,34 +292,70 @@ static llvm::Module* jitCompile(const std::string& String) } const llvm::Function* func(llvm::dyn_cast(gv)); if (func) { - dstream << "declare " << SanitizeType(func->getReturnType()) << " @" << sym << " ("; + ss << "declare " << SanitizeType(func->getReturnType()) << " @" << sym << " ("; bool first(true); for (const auto& arg : func->getArgumentList()) { if (!first) { - dstream << ", "; + ss << ", "; } else { first = false; } - dstream << SanitizeType(arg.getType()); + ss << SanitizeType(arg.getType()); } if (func->isVarArg()) { - dstream << ", ..."; + ss << ", ..."; } - dstream << ")\n"; + ss << ")\n"; } else { auto str(SanitizeType(gv->getType())); - dstream << '@' << sym << " = external global " << str.substr(0, str.length() - 1) << '\n'; + ss << '@' << sym << " = external global " << str.substr(0, str.length() - 1) << '\n'; } } - // std::cout << "**** DECL ****\n" << dstream.str() << "**** ENDDECL ****\n" << std::endl; + return ss.str(); +} + +static llvm::Module* jitCompile(const std::string& String) +{ + // Create some module to put our function into it. + using namespace llvm; + legacy::PassManager* PM = extemp::EXTLLVM::PM; + legacy::PassManager* PM_NO = extemp::EXTLLVM::PM_NO; + + std::string asmcode(String); + SMDiagnostic pa; + + static std::string sInlineString; // This is a hack for now, but it *WORKS* + static std::string sInlineBitcode; + static std::unordered_set sInlineSyms; + + if (sInlineString.empty()) { + sInlineString = bitcodeDotLLString(); + insertMatchingSymbols(sInlineString, sGlobalSymRegex, sInlineSyms); + insertMatchingSymbols(inlineDotLLString(), sGlobalSymRegex, sInlineSyms); + } + + if (sInlineBitcode.empty()) { + // need to avoid parsing the types twice + static bool first(true); + if (!first) { + sInlineBitcode = IRToBitcode(sInlineString); + sInlineString = inlineDotLLString(); + } else { + first = false; + } + } + std::unique_ptr newModule; + std::string declarations = globalDeclarations(asmcode, sInlineSyms); + + // std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; if (!sInlineBitcode.empty()) { auto modOrErr(parseBitcodeFile(llvm::MemoryBufferRef(sInlineBitcode, ""), getGlobalContext())); if (likely(modOrErr)) { newModule = std::move(modOrErr.get()); - asmcode = sInlineString + dstream.str() + asmcode; + asmcode = sInlineString + declarations + asmcode; if (parseAssemblyInto(llvm::MemoryBufferRef(asmcode, ""), *newModule, pa)) { - std::cout << "**** DECL ****\n" << dstream.str() << "**** ENDDECL ****\n" << std::endl; + std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; newModule.reset(); } } From f7ae34d714ceb64194df19244f2f63cbc4087166 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:25:10 -0700 Subject: [PATCH 06/16] remove sInlineString --- src/SchemeFFI.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 21dbab831..6b2527fd0 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -325,22 +325,21 @@ static llvm::Module* jitCompile(const std::string& String) std::string asmcode(String); SMDiagnostic pa; - static std::string sInlineString; // This is a hack for now, but it *WORKS* static std::string sInlineBitcode; static std::unordered_set sInlineSyms; - if (sInlineString.empty()) { - sInlineString = bitcodeDotLLString(); - insertMatchingSymbols(sInlineString, sGlobalSymRegex, sInlineSyms); + static bool loadedInlineSyms(false); + if (!loadedInlineSyms) { + insertMatchingSymbols(bitcodeDotLLString(), sGlobalSymRegex, sInlineSyms); insertMatchingSymbols(inlineDotLLString(), sGlobalSymRegex, sInlineSyms); + loadedInlineSyms = true; } if (sInlineBitcode.empty()) { // need to avoid parsing the types twice static bool first(true); if (!first) { - sInlineBitcode = IRToBitcode(sInlineString); - sInlineString = inlineDotLLString(); + sInlineBitcode = IRToBitcode(bitcodeDotLLString()); } else { first = false; } @@ -353,7 +352,7 @@ static llvm::Module* jitCompile(const std::string& String) auto modOrErr(parseBitcodeFile(llvm::MemoryBufferRef(sInlineBitcode, ""), getGlobalContext())); if (likely(modOrErr)) { newModule = std::move(modOrErr.get()); - asmcode = sInlineString + declarations + asmcode; + asmcode = inlineDotLLString() + declarations + asmcode; if (parseAssemblyInto(llvm::MemoryBufferRef(asmcode, ""), *newModule, pa)) { std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; newModule.reset(); From 3480862e331e212c82d5d993e09b557d0f5bb25f Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:27:39 -0700 Subject: [PATCH 07/16] refactor sInlineSyms --- src/SchemeFFI.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 6b2527fd0..213365fb0 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -265,9 +265,16 @@ static std::string SanitizeType(llvm::Type* Type) return str; } +static std::unordered_set loadInlineSyms() { + std::unordered_set inlineSyms; + insertMatchingSymbols(bitcodeDotLLString(), sGlobalSymRegex, inlineSyms); + insertMatchingSymbols(inlineDotLLString(), sGlobalSymRegex, inlineSyms); + return inlineSyms; +} + static std::string -globalDeclarations(const std::string &asmcode, - const std::unordered_set &sInlineSyms) { +globalDeclarations(const std::string &asmcode) { + static std::unordered_set sInlineSyms(loadInlineSyms()); std::vector symbols; // Copy all @symbols @like @this into symbols insertMatchingSymbols(asmcode, sGlobalSymRegex, symbols); @@ -326,14 +333,6 @@ static llvm::Module* jitCompile(const std::string& String) SMDiagnostic pa; static std::string sInlineBitcode; - static std::unordered_set sInlineSyms; - - static bool loadedInlineSyms(false); - if (!loadedInlineSyms) { - insertMatchingSymbols(bitcodeDotLLString(), sGlobalSymRegex, sInlineSyms); - insertMatchingSymbols(inlineDotLLString(), sGlobalSymRegex, sInlineSyms); - loadedInlineSyms = true; - } if (sInlineBitcode.empty()) { // need to avoid parsing the types twice @@ -345,7 +344,7 @@ static llvm::Module* jitCompile(const std::string& String) } } std::unique_ptr newModule; - std::string declarations = globalDeclarations(asmcode, sInlineSyms); + std::string declarations = globalDeclarations(asmcode); // std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; if (!sInlineBitcode.empty()) { From 1e8bd847a2d6969253ffdf5fcc7f0bfddb0144d6 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:29:46 -0700 Subject: [PATCH 08/16] use explicit llvm:: namespace --- src/SchemeFFI.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 213365fb0..770084d98 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -324,13 +324,8 @@ globalDeclarations(const std::string &asmcode) { static llvm::Module* jitCompile(const std::string& String) { - // Create some module to put our function into it. - using namespace llvm; - legacy::PassManager* PM = extemp::EXTLLVM::PM; - legacy::PassManager* PM_NO = extemp::EXTLLVM::PM_NO; - std::string asmcode(String); - SMDiagnostic pa; + llvm::SMDiagnostic pa; static std::string sInlineBitcode; @@ -343,12 +338,14 @@ static llvm::Module* jitCompile(const std::string& String) first = false; } } + + // Create some module to put our function into it. std::unique_ptr newModule; std::string declarations = globalDeclarations(asmcode); // std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; if (!sInlineBitcode.empty()) { - auto modOrErr(parseBitcodeFile(llvm::MemoryBufferRef(sInlineBitcode, ""), getGlobalContext())); + auto modOrErr(parseBitcodeFile(llvm::MemoryBufferRef(sInlineBitcode, ""), llvm::getGlobalContext())); if (likely(modOrErr)) { newModule = std::move(modOrErr.get()); asmcode = inlineDotLLString() + declarations + asmcode; @@ -358,16 +355,16 @@ static llvm::Module* jitCompile(const std::string& String) } } } else { - newModule = parseAssemblyString(asmcode, pa, getGlobalContext()); + newModule = parseAssemblyString(asmcode, pa, llvm::getGlobalContext()); } if (newModule) { if (unlikely(!extemp::UNIV::ARCH.empty())) { newModule->setTargetTriple(extemp::UNIV::ARCH); } if (EXTLLVM::OPTIMIZE_COMPILES) { - PM->run(*newModule); + extemp::EXTLLVM::PM->run(*newModule); } else { - PM_NO->run(*newModule); + extemp::EXTLLVM::PM_NO->run(*newModule); } } From f2da37f82c08bb7819d280ba1a7cdb8b52cfe178 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:32:46 -0700 Subject: [PATCH 09/16] be a little more explicit about prepending bitcode/ir --- src/SchemeFFI.cpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 770084d98..8f9b2d904 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -327,24 +327,18 @@ static llvm::Module* jitCompile(const std::string& String) std::string asmcode(String); llvm::SMDiagnostic pa; - static std::string sInlineBitcode; - - if (sInlineBitcode.empty()) { - // need to avoid parsing the types twice - static bool first(true); - if (!first) { - sInlineBitcode = IRToBitcode(bitcodeDotLLString()); - } else { - first = false; - } - } + static std::string sInlineBitcode(IRToBitcode(bitcodeDotLLString())); // Create some module to put our function into it. std::unique_ptr newModule; std::string declarations = globalDeclarations(asmcode); // std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; - if (!sInlineBitcode.empty()) { + + // The first file we compile is init.ll, and we don't want to prepend inline.ll, or any global + // declarations to it. + static bool shouldPrepend(false); + if (shouldPrepend) { auto modOrErr(parseBitcodeFile(llvm::MemoryBufferRef(sInlineBitcode, ""), llvm::getGlobalContext())); if (likely(modOrErr)) { newModule = std::move(modOrErr.get()); @@ -356,6 +350,7 @@ static llvm::Module* jitCompile(const std::string& String) } } else { newModule = parseAssemblyString(asmcode, pa, llvm::getGlobalContext()); + shouldPrepend = true; } if (newModule) { if (unlikely(!extemp::UNIV::ARCH.empty())) { From 63e0feaf202c12c4bef6e7dc9ff873436d846945 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:37:50 -0700 Subject: [PATCH 10/16] small opinionated refactoring + formatting --- src/SchemeFFI.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 8f9b2d904..f162fb29e 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -230,6 +230,16 @@ static std::string IRToBitcode(const std::string &ir) { return bitcode; } +static std::unique_ptr parseBitcodeFile(const std::string &sInlineBitcode) { + llvm::ErrorOr> maybe(llvm::parseBitcodeFile(llvm::MemoryBufferRef(sInlineBitcode, ""), llvm::getGlobalContext())); + + if (maybe) { + return std::move(maybe.get()); + } else { + return nullptr; + } +} + // match @symbols @like @this_123 static const std::regex sGlobalSymRegex( "[ \t]@([-a-zA-Z$._][-a-zA-Z$._0-9]*)", @@ -339,12 +349,15 @@ static llvm::Module* jitCompile(const std::string& String) // declarations to it. static bool shouldPrepend(false); if (shouldPrepend) { - auto modOrErr(parseBitcodeFile(llvm::MemoryBufferRef(sInlineBitcode, ""), llvm::getGlobalContext())); - if (likely(modOrErr)) { - newModule = std::move(modOrErr.get()); + std::unique_ptr mod(parseBitcodeFile(sInlineBitcode)); + if (likely(mod)) { + newModule = std::move(mod); asmcode = inlineDotLLString() + declarations + asmcode; if (parseAssemblyInto(llvm::MemoryBufferRef(asmcode, ""), *newModule, pa)) { - std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; + std::cout << "**** DECL ****\n" + << declarations + << "**** ENDDECL ****\n" + << std::endl; newModule.reset(); } } From 32f2beb895d5923ce91c85ec81d6346d675d92a5 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:41:56 -0700 Subject: [PATCH 11/16] isThisInitDotLL is a pretty precise name --- src/SchemeFFI.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index f162fb29e..64178b58b 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -284,9 +284,11 @@ static std::unordered_set loadInlineSyms() { static std::string globalDeclarations(const std::string &asmcode) { + // Contains @all @symbols from bitcode.ll and inline.ll static std::unordered_set sInlineSyms(loadInlineSyms()); + std::vector symbols; - // Copy all @symbols @like @this into symbols + // Copy all @symbols @like @this into symbols from asmcode insertMatchingSymbols(asmcode, sGlobalSymRegex, symbols); std::sort(symbols.begin(), symbols.end()); auto end(std::unique(symbols.begin(), symbols.end())); @@ -294,9 +296,7 @@ globalDeclarations(const std::string &asmcode) { std::unordered_set ignoreSyms; insertMatchingSymbols(asmcode, sDefineSymRegex, ignoreSyms); - std::stringstream ss; - // Iterating over all @symbols @in @asmcode matching sGlobalSymRegex for (auto iter = symbols.begin(); iter != end; ++iter) { const char* sym(iter->c_str()); @@ -345,10 +345,10 @@ static llvm::Module* jitCompile(const std::string& String) // std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; - // The first file we compile is init.ll, and we don't want to prepend inline.ll, or any global - // declarations to it. - static bool shouldPrepend(false); - if (shouldPrepend) { + // The first file we compile is init.ll, and we don't want to prepend inline.ll, + // or any global declarations to it. + static bool isThisInitDotLL(true); + if (!isThisInitDotLL) { std::unique_ptr mod(parseBitcodeFile(sInlineBitcode)); if (likely(mod)) { newModule = std::move(mod); @@ -361,10 +361,13 @@ static llvm::Module* jitCompile(const std::string& String) newModule.reset(); } } - } else { + } + + if (isThisInitDotLL) { newModule = parseAssemblyString(asmcode, pa, llvm::getGlobalContext()); - shouldPrepend = true; + isThisInitDotLL = false; } + if (newModule) { if (unlikely(!extemp::UNIV::ARCH.empty())) { newModule->setTargetTriple(extemp::UNIV::ARCH); From 0acbf99f3b6e9409df8cc6ceccd55ee2558a5065 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:48:27 -0700 Subject: [PATCH 12/16] remove one copy, lean on LLVM more for printing errors --- src/SchemeFFI.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 64178b58b..705f1d898 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -332,11 +332,8 @@ globalDeclarations(const std::string &asmcode) { return ss.str(); } -static llvm::Module* jitCompile(const std::string& String) +static llvm::Module* jitCompile(const std::string& asmcode) { - std::string asmcode(String); - llvm::SMDiagnostic pa; - static std::string sInlineBitcode(IRToBitcode(bitcodeDotLLString())); // Create some module to put our function into it. @@ -348,12 +345,14 @@ static llvm::Module* jitCompile(const std::string& String) // The first file we compile is init.ll, and we don't want to prepend inline.ll, // or any global declarations to it. static bool isThisInitDotLL(true); + + llvm::SMDiagnostic pa; if (!isThisInitDotLL) { std::unique_ptr mod(parseBitcodeFile(sInlineBitcode)); if (likely(mod)) { newModule = std::move(mod); - asmcode = inlineDotLLString() + declarations + asmcode; - if (parseAssemblyInto(llvm::MemoryBufferRef(asmcode, ""), *newModule, pa)) { + const std::string code = inlineDotLLString() + declarations + asmcode; + if (parseAssemblyInto(llvm::MemoryBufferRef(code, ""), *newModule, pa)) { std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" @@ -383,12 +382,11 @@ static llvm::Module* jitCompile(const std::string& String) { // std::cout << "**** CODE ****\n" << asmcode << " **** ENDCODE ****" << std::endl; // std::cout << pa.getMessage().str() << std::endl << pa.getLineNo() << std::endl; - std::string errstr; - llvm::raw_string_ostream ss(errstr); - pa.print("LLVM IR",ss); - printf("%s\n",ss.str().c_str()); + pa.print("LLVM IR", llvm::outs()); return nullptr; - } else if (extemp::EXTLLVM::VERIFY_COMPILES && verifyModule(*newModule)) { + } + + if (extemp::EXTLLVM::VERIFY_COMPILES && verifyModule(*newModule)) { std::cout << "\nInvalid LLVM IR\n"; return nullptr; } From 73ae5988fbd5c10e5501e807aaf9df442757011c Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:51:04 -0700 Subject: [PATCH 13/16] oh and bitcode.ll too! update this comment --- src/SchemeFFI.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 705f1d898..6cb8fc932 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -342,8 +342,8 @@ static llvm::Module* jitCompile(const std::string& asmcode) // std::cout << "**** DECL ****\n" << declarations << "**** ENDDECL ****\n" << std::endl; - // The first file we compile is init.ll, and we don't want to prepend inline.ll, - // or any global declarations to it. + // The first file we compile is init.ll, and we don't want to prepend bitcode.ll, + // inline.ll, or any global declarations to it. static bool isThisInitDotLL(true); llvm::SMDiagnostic pa; From 1247c14eb459790d77b4e5bc824f49a1eaf03802 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 20:51:35 -0700 Subject: [PATCH 14/16] add a clang-format file --- .clang-format | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..39a810d2a --- /dev/null +++ b/.clang-format @@ -0,0 +1,6 @@ +--- +Language: Cpp +BasedOnStyle: LLVM +IndentWidth: 4 +... + From 96a43530e50740241ee8a7e0cc58ee550f1e1ed5 Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Tue, 27 Apr 2021 23:53:12 -0700 Subject: [PATCH 15/16] remove template + sorting, seems unnecessary? --- src/SchemeFFI.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 6cb8fc932..876543c8d 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -250,12 +250,9 @@ static const std::regex sDefineSymRegex( "define[^\\n]+@([-a-zA-Z$._][-a-zA-Z$._0-9]*)", std::regex::optimize | std::regex::ECMAScript); -// template is temporary, we'll remove this once the refactoring is done -template static void insertMatchingSymbols( const std::string &code, const std::regex ®ex, - // std::unordered_set &containingSet - T &containingSet) + std::unordered_set &containingSet) { std::copy(std::sregex_token_iterator(code.begin(), code.end(), regex, 1), std::sregex_token_iterator(), @@ -287,19 +284,17 @@ globalDeclarations(const std::string &asmcode) { // Contains @all @symbols from bitcode.ll and inline.ll static std::unordered_set sInlineSyms(loadInlineSyms()); - std::vector symbols; + std::unordered_set symbols; // Copy all @symbols @like @this into symbols from asmcode insertMatchingSymbols(asmcode, sGlobalSymRegex, symbols); - std::sort(symbols.begin(), symbols.end()); - auto end(std::unique(symbols.begin(), symbols.end())); std::unordered_set ignoreSyms; insertMatchingSymbols(asmcode, sDefineSymRegex, ignoreSyms); std::stringstream ss; // Iterating over all @symbols @in @asmcode matching sGlobalSymRegex - for (auto iter = symbols.begin(); iter != end; ++iter) { - const char* sym(iter->c_str()); + for (auto sym_s : symbols) { + const char* sym(sym_s.c_str()); if (sInlineSyms.find(sym) != sInlineSyms.end() || ignoreSyms.find(sym) != ignoreSyms.end()) { continue; } From 98d0d0c2a262283b3d5315103f36ed359b7ccf0d Mon Sep 17 00:00:00 2001 From: Nicholas Donaldson Date: Mon, 31 May 2021 16:10:15 -0700 Subject: [PATCH 16/16] use EXT_DYLIB, and lift related #ifdef/#endif from _WIN32 --- include/SchemeProcess.h | 2 +- src/SchemeFFI.cpp | 6 +++--- src/SchemeProcess.cpp | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/SchemeProcess.h b/include/SchemeProcess.h index bf2cc91c0..73ca7dd78 100644 --- a/include/SchemeProcess.h +++ b/include/SchemeProcess.h @@ -138,7 +138,7 @@ class SchemeProcess { } bool loadFile(const std::string& File, const std::string& Path = std::string()); bool loadString(const std::string& str); -#ifdef DYLIB +#ifdef EXT_DYLIB bool loadFileAsString(char* fname); void findAndReplaceAll(std::string &data, std::string toSearch, std::string replaceStr); #endif diff --git a/src/SchemeFFI.cpp b/src/SchemeFFI.cpp index 876543c8d..8e4081abf 100644 --- a/src/SchemeFFI.cpp +++ b/src/SchemeFFI.cpp @@ -82,7 +82,7 @@ #include #endif -#ifdef DYLIB +#ifdef EXT_DYLIB #include CMRC_DECLARE(xtm); #endif @@ -192,7 +192,7 @@ static std::string fileToString(const std::string &fileName) { } static const std::string inlineDotLLString() { -#ifdef DYLIB +#ifdef EXT_DYLIB auto fs = cmrc::xtm::get_filesystem(); auto data = fs.open("runtime/inline.ll"); static const std::string sInlineDotLLString(data.begin(), data.end()); @@ -205,7 +205,7 @@ static const std::string inlineDotLLString() { } static const std::string bitcodeDotLLString() { -#ifdef DYLIB +#ifdef EXT_DYLIB auto fs = cmrc::xtm::get_filesystem(); auto data = fs.open("runtime/bitcode.ll"); static const std::string sBitcodeDotLLString(data.begin(), data.end()); diff --git a/src/SchemeProcess.cpp b/src/SchemeProcess.cpp index de4b253f7..b49250e71 100644 --- a/src/SchemeProcess.cpp +++ b/src/SchemeProcess.cpp @@ -46,14 +46,14 @@ #include #include -#ifdef _WIN32 -#include - #ifdef EXT_DYLIB #include CMRC_DECLARE(xtm); #endif +#ifdef _WIN32 +#include + static void usleep(LONGLONG Us) { auto timer(CreateWaitableTimer(NULL, TRUE, NULL)); @@ -134,7 +134,7 @@ SchemeProcess::SchemeProcess(const std::string& LoadPath, const std::string& Nam memset(m_schemeOutportString, 0, SCHEME_OUTPORT_STRING_LENGTH); scheme_set_output_port_string(m_scheme, m_schemeOutportString, m_schemeOutportString + SCHEME_OUTPORT_STRING_LENGTH - 1); -#ifdef DYLIB +#ifdef EXT_DYLIB auto fs = cmrc::xtm::get_filesystem(); auto data = fs.open("runtime/init.xtm"); std::string fstr = std::string(data.begin(), data.end()); @@ -244,7 +244,7 @@ bool SchemeProcess::loadString(const std::string& str) { return true; } -#ifdef DYLIB +#ifdef EXT_DYLIB bool SchemeProcess::loadFileAsString(char* fname) { auto fs = cmrc::xtm::get_filesystem(); @@ -280,7 +280,7 @@ void* SchemeProcess::taskImpl() #endif while(!m_running) { } -#ifdef DYLIB +#ifdef EXT_DYLIB loadFileAsString("runtime/scheme.xtm"); loadFileAsString("runtime/llvmti.xtm"); loadFileAsString("runtime/llvmir.xtm"); @@ -298,7 +298,7 @@ void* SchemeProcess::taskImpl() // only load extempore.xtm in primary process if (m_name == "primary") { EXTMonitor::ScopedLock lock(m_guard); -#ifdef DYLIB +#ifdef EXT_DYLIB auto fs = cmrc::xtm::get_filesystem(); auto data = fs.open("runtime/init.ll"); std::string fstr = std::string(data.begin(), data.end());