From 154650e6f6b969af9be3f65cd05f3ade621fa558 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 7 Feb 2025 07:16:20 -0800 Subject: [PATCH 1/3] [SYCL][NFCI] Restore commit ccd73ee Commit > ccd73eeab34b31c7c38e9aca05ca4192fb0913b0 > [LinkerWrapper] Remove in-house handling of LTO (llvm/llvm-project#113715) Removed a lot of dead code from `clang-linker-wrapper`. However, it has been preserved in our fork by: > d7c2a0fe4dea71e469fc651a87e549f4fd363202 > Merge from 'main' to 'sycl-web' (69 commits) Due to the way we have handled a divergence from the upstream it isn't really clear if something is our code or not, so it wasn't that hard to make a conflict resolution mistake. This commit removes all that dead code to make it closer to the upstream. --- .../ClangLinkerWrapper.cpp | 354 +----------------- 1 file changed, 3 insertions(+), 351 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 735ecb55fc80c..df1aeb3e69dd8 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -147,9 +147,6 @@ static std::list> TempFiles; /// Codegen flags for LTO backend. static codegen::RegisterCodeGenFlags CodeGenFlags; -/// Global flag to indicate that the LTO pipeline threw an error. -static std::atomic LTOError; - static std::optional SYCLModuleSplitMode; static bool UseSYCLPostLinkTool; @@ -1706,338 +1703,6 @@ void diagnosticHandler(const DiagnosticInfo &DI) { } } -// Get the list of target features from the input file and unify them such that -// if there are multiple +xxx or -xxx features we only keep the last one. -std::vector getTargetFeatures(ArrayRef InputFiles) { - SmallVector Features; - for (const OffloadFile &File : InputFiles) { - for (auto &Arg : llvm::split(File.getBinary()->getString("feature"), ",")) - Features.emplace_back(Arg); - } - - // Only add a feature if it hasn't been seen before starting from the end. - std::vector UnifiedFeatures; - DenseSet UsedFeatures; - for (StringRef Feature : llvm::reverse(Features)) { - if (UsedFeatures.insert(Feature.drop_front()).second) - UnifiedFeatures.push_back(Feature.str()); - } - - return UnifiedFeatures; -} - -template > -std::unique_ptr createLTO( - const ArgList &Args, const std::vector &Features, - ModuleHook Hook = [](size_t, const Module &) { return true; }) { - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - // We need to remove AMD's target-id from the processor if present. - StringRef TargetID = Args.getLastArgValue(OPT_arch_EQ); - StringRef Arch = clang::getProcessorFromTargetID(Triple, TargetID); - lto::Config Conf; - lto::ThinBackend Backend; - // TODO: Handle index-only thin-LTO - Backend = - lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); - - Conf.CPU = Arch.str(); - Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple); - - Conf.RemarksFilename = RemarksFilename; - Conf.RemarksPasses = RemarksPasses; - Conf.RemarksWithHotness = RemarksWithHotness; - Conf.RemarksHotnessThreshold = RemarksHotnessThreshold; - Conf.RemarksFormat = RemarksFormat; - - StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); - Conf.MAttrs = Features; - std::optional CGOptLevelOrNone = - CodeGenOpt::parseLevel(OptLevel[1]); - assert(CGOptLevelOrNone && "Invalid optimization level"); - Conf.CGOptLevel = *CGOptLevelOrNone; - Conf.OptLevel = OptLevel[1] - '0'; - Conf.DefaultTriple = Triple.getTriple(); - - // TODO: Should we complain about combining --opt-level and -passes, as opt - // does? That might be too limiting in clang-linker-wrapper, so for now we - // just warn in the help entry for -passes that the default corresponding - // to --opt-level=O? should be included there. The problem is that - // --opt-level produces effects in clang-linker-wrapper beyond what -passes - // appears to be able to achieve, so rejecting the combination of --opt-level - // and -passes would apparently make it impossible to combine those effects - // with a custom pass pipeline. - Conf.OptPipeline = PassPipeline; - Conf.PassPlugins = PassPlugins; - - LTOError = false; - Conf.DiagHandler = diagnosticHandler; - - Conf.PTO.LoopVectorization = Conf.OptLevel > 1; - Conf.PTO.SLPVectorization = Conf.OptLevel > 1; - - if (SaveTemps) { - std::string TempName = (sys::path::filename(ExecutableName) + "." + - Triple.getTriple() + "." + TargetID) - .str(); - Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) { - std::string File = - !Task ? TempName + ".postlink.bc" - : TempName + "." + std::to_string(Task) + ".postlink.bc"; - error_code EC; - raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - return true; - }; - Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) { - std::string File = - !Task ? TempName + ".postopt.bc" - : TempName + "." + std::to_string(Task) + ".postopt.bc"; - error_code EC; - raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - return true; - }; - } - Conf.PostOptModuleHook = Hook; - Conf.CGFileType = (Triple.isNVPTX() || SaveTemps) - ? CodeGenFileType::AssemblyFile - : CodeGenFileType::ObjectFile; - - // TODO: Handle remark files - Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program); - - return std::make_unique(std::move(Conf), Backend); -} - -// Returns true if \p S is valid as a C language identifier and will be given -// `__start_` and `__stop_` symbols. -bool isValidCIdentifier(StringRef S) { - return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && - llvm::all_of(llvm::drop_begin(S), - [](char C) { return C == '_' || isAlnum(C); }); -} - -Error linkBitcodeFiles(SmallVectorImpl &InputFiles, - SmallVectorImpl &OutputFiles, - const ArgList &Args) { - llvm::TimeTraceScope TimeScope("Link bitcode files"); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - - // Early exit for SPIR targets - if (Triple.isSPIROrSPIRV()) - return Error::success(); - - SmallVector BitcodeInputFiles; - DenseSet StrongResolutions; - DenseSet UsedInRegularObj; - DenseSet UsedInSharedLib; - BumpPtrAllocator Alloc; - StringSaver Saver(Alloc); - - // Search for bitcode files in the input and create an LTO input file. If - // it is not a bitcode file, scan its symbol table for symbols we need to - // save. - for (OffloadFile &File : InputFiles) { - MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), ""); - - file_magic Type = identify_magic(Buffer.getBuffer()); - switch (Type) { - case file_magic::bitcode: { - Expected IRSymtabOrErr = readIRSymtab(Buffer); - if (!IRSymtabOrErr) - return IRSymtabOrErr.takeError(); - - // Check for any strong resolutions we need to preserve. - for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { - for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) { - if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() && - !Sym.isUndefined()) - StrongResolutions.insert(Saver.save(Sym.Name)); - } - } - BitcodeInputFiles.emplace_back(std::move(File)); - continue; - } - case file_magic::elf_relocatable: - case file_magic::elf_shared_object: { - Expected> ObjFile = - ObjectFile::createObjectFile(Buffer); - if (!ObjFile) - continue; - - for (SymbolRef Sym : (*ObjFile)->symbols()) { - Expected Name = Sym.getName(); - if (!Name) - return Name.takeError(); - - // Record if we've seen these symbols in any object or shared - // libraries. - if ((*ObjFile)->isRelocatableObject()) - UsedInRegularObj.insert(Saver.save(*Name)); - else - UsedInSharedLib.insert(Saver.save(*Name)); - } - continue; - } - default: - continue; - } - } - - if (BitcodeInputFiles.empty()) - return Error::success(); - - // Remove all the bitcode files that we moved from the original input. - llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); }); - - // LTO Module hook to output bitcode without running the backend. - SmallVector BitcodeOutput; - auto OutputBitcode = [&](size_t, const Module &M) { - auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) + - "-jit-" + Triple.getTriple(), - "bc"); - if (!TempFileOrErr) - reportError(TempFileOrErr.takeError()); - - std::error_code EC; - raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - BitcodeOutput.push_back(*TempFileOrErr); - return false; - }; - - // We assume visibility of the whole program if every input file was - // bitcode. - auto Features = getTargetFeatures(BitcodeInputFiles); - auto LTOBackend = Args.hasArg(OPT_embed_bitcode) || - Args.hasArg(OPT_builtin_bitcode_EQ) || - Args.hasArg(OPT_clang_backend) - ? createLTO(Args, Features, OutputBitcode) - : createLTO(Args, Features); - - // We need to resolve the symbols so the LTO backend knows which symbols - // need to be kept or can be internalized. This is a simplified symbol - // resolution scheme to approximate the full resolution a linker would do. - uint64_t Idx = 0; - DenseSet PrevailingSymbols; - for (auto &BitcodeInput : BitcodeInputFiles) { - // Get a semi-unique buffer identifier for Thin-LTO. - StringRef Identifier = Saver.save( - std::to_string(Idx++) + "." + - BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier()); - MemoryBufferRef Buffer = - MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier); - Expected> BitcodeFileOrErr = - llvm::lto::InputFile::create(Buffer); - if (!BitcodeFileOrErr) - return BitcodeFileOrErr.takeError(); - - // Save the input file and the buffer associated with its memory. - const auto Symbols = (*BitcodeFileOrErr)->symbols(); - SmallVector Resolutions(Symbols.size()); - size_t Idx = 0; - for (auto &Sym : Symbols) { - lto::SymbolResolution &Res = Resolutions[Idx++]; - - // We will use this as the prevailing symbol definition in LTO unless - // it is undefined or another definition has already been used. - Res.Prevailing = - !Sym.isUndefined() && - !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) && - PrevailingSymbols.insert(Saver.save(Sym.getName())).second; - - // We need LTO to preseve the following global symbols: - // 1) Symbols used in regular objects. - // 2) Sections that will be given a __start/__stop symbol. - // 3) Prevailing symbols that are needed visible to external - // libraries. - Res.VisibleToRegularObj = - UsedInRegularObj.contains(Sym.getName()) || - isValidCIdentifier(Sym.getSectionName()) || - (Res.Prevailing && - (Sym.getVisibility() != GlobalValue::HiddenVisibility && - !Sym.canBeOmittedFromSymbolTable())); - - // Identify symbols that must be exported dynamically and can be - // referenced by other files. - Res.ExportDynamic = - Sym.getVisibility() != GlobalValue::HiddenVisibility && - (UsedInSharedLib.contains(Sym.getName()) || - !Sym.canBeOmittedFromSymbolTable()); - - // The final definition will reside in this linkage unit if the symbol - // is defined and local to the module. This only checks for bitcode - // files, full assertion will require complete symbol resolution. - Res.FinalDefinitionInLinkageUnit = - Sym.getVisibility() != GlobalValue::DefaultVisibility && - (!Sym.isUndefined() && !Sym.isCommon()); - - // We do not support linker redefined symbols (e.g. --wrap) for device - // image linking, so the symbols will not be changed after LTO. - Res.LinkerRedefined = false; - } - - // Add the bitcode file with its resolved symbols to the LTO job. - if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions)) - return Err; - } - - // Run the LTO job to compile the bitcode. - size_t MaxTasks = LTOBackend->getMaxTasks(); - SmallVector Files(MaxTasks); - auto AddStream = - [&](size_t Task, - const Twine &ModuleName) -> std::unique_ptr { - int FD = -1; - auto &TempFile = Files[Task]; - StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o"; - std::string TaskStr = Task ? "." + std::to_string(Task) : ""; - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName) + "." + - Triple.getTriple() + "." + Arch + TaskStr, - Extension); - if (!TempFileOrErr) - reportError(TempFileOrErr.takeError()); - TempFile = *TempFileOrErr; - if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD)) - reportError(errorCodeToError(EC)); - return std::make_unique( - std::make_unique(FD, true)); - }; - - if (Error Err = LTOBackend->run(AddStream)) - return Err; - - if (LTOError) - return createStringError("Errors encountered inside the LTO pipeline."); - - // If we are embedding bitcode we only need the intermediate output. - bool SingleOutput = Files.size() == 1; - if (Args.hasArg(OPT_embed_bitcode)) { - if (BitcodeOutput.size() != 1 || !SingleOutput) - return createStringError("Cannot embed bitcode with multiple files."); - OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front())); - return Error::success(); - } - - // Append the new inputs to the device linker input. If the user requested - // an internalizing link we need to pass the bitcode to clang. - for (StringRef File : - Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ) - ? BitcodeOutput - : Files) - OutputFiles.push_back(File); - - return Error::success(); -} - // Compile the module to an object file using the appropriate target machine for // the host triple. Expected compileModule(Module &M, OffloadKind Kind) { @@ -2370,15 +2035,8 @@ Expected> linkAndWrapDeviceFiles( return FileNameOrErr.takeError(); InputFiles.emplace_back(*FileNameOrErr); } + if (HasSYCLOffloadKind) { - SmallVector InputFiles; - // Write device inputs to an output file for the linker. - for (const OffloadFile &File : Input) { - auto FileNameOrErr = writeOffloadFile(File); - if (!FileNameOrErr) - return FileNameOrErr.takeError(); - InputFiles.emplace_back(*FileNameOrErr); - } // Link the input device files using the device linker for SYCL // offload. auto TmpOutputOrErr = sycl::linkDevice(InputFiles, LinkerArgs); @@ -2458,12 +2116,8 @@ Expected> linkAndWrapDeviceFiles( std::scoped_lock Guard(ImageMtx); WrappedOutput.push_back(*OutputFile); } - if (HasNonSYCLOffloadKinds) { - // First link and remove all the input files containing bitcode. - SmallVector InputFiles; - if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs)) - return Err; + if (HasNonSYCLOffloadKinds) { // Write any remaining device inputs to an output file for the linker. for (const OffloadFile &File : Input) { auto FileNameOrErr = writeOffloadFile(File); @@ -2473,9 +2127,7 @@ Expected> linkAndWrapDeviceFiles( } // Link the remaining device files using the device linker. - auto OutputOrErr = !Args.hasArg(OPT_embed_bitcode) - ? linkDevice(InputFiles, LinkerArgs) - : InputFiles.front(); + auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); if (!OutputOrErr) return OutputOrErr.takeError(); // Store the offloading image for each linked output file. From 3f535e56bf93dca2aaecf5ffcb35797ab6186cb8 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 7 Feb 2025 07:33:03 -0800 Subject: [PATCH 2/3] [SYCL][NFCI] Unify our customizations with the community We seem to have put all the community code into a separate branch which made it hard to maintain (i.e. keep in sync with the upstream). This commit outlines some common parts of downstream and upstream code to simplify feature maintanence (by making the code closer to the upstream overall). --- .../ClangLinkerWrapper.cpp | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index df1aeb3e69dd8..6443244a2e1bc 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -2036,14 +2036,16 @@ Expected> linkAndWrapDeviceFiles( InputFiles.emplace_back(*FileNameOrErr); } + // Link the remaining device files using the device linker. + Expected OutputOrErr = + HasSYCLOffloadKind ? sycl::linkDevice(InputFiles, LinkerArgs) + : linkDevice(InputFiles, LinkerArgs); + if (!OutputOrErr) + return OutputOrErr.takeError(); + if (HasSYCLOffloadKind) { - // Link the input device files using the device linker for SYCL - // offload. - auto TmpOutputOrErr = sycl::linkDevice(InputFiles, LinkerArgs); - if (!TmpOutputOrErr) - return TmpOutputOrErr.takeError(); SmallVector InputFilesSYCL; - InputFilesSYCL.emplace_back(*TmpOutputOrErr); + InputFilesSYCL.emplace_back(*OutputOrErr); auto SplitModulesOrErr = UseSYCLPostLinkTool ? sycl::runSYCLPostLinkTool(InputFilesSYCL, LinkerArgs) @@ -2117,19 +2119,9 @@ Expected> linkAndWrapDeviceFiles( WrappedOutput.push_back(*OutputFile); } + // Note: body of the 'if' below is supposed to match 1:1 (or at least very + // close) to the code in the upstream. if (HasNonSYCLOffloadKinds) { - // Write any remaining device inputs to an output file for the linker. - for (const OffloadFile &File : Input) { - auto FileNameOrErr = writeOffloadFile(File); - if (!FileNameOrErr) - return FileNameOrErr.takeError(); - InputFiles.emplace_back(*FileNameOrErr); - } - - // Link the remaining device files using the device linker. - auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); - if (!OutputOrErr) - return OutputOrErr.takeError(); // Store the offloading image for each linked output file. for (OffloadKind Kind : ActiveOffloadKinds) { llvm::ErrorOr> FileOrErr = From a1644f1fc1ae3a12f77be7978f8d6afd97e9e742 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 7 Feb 2025 07:36:45 -0800 Subject: [PATCH 3/3] [SYCL][NFC] Restore 4a96081 Commit > 4a96081224b6c0f166760eab0c42ef3dfadd5ed1 > [clang-linker-wrapper] Fix a warning Seems to be accidentally reverted by > d7c2a0fe4dea71e469fc651a87e549f4fd363202 > Merge from 'main' to 'sycl-web' (69 commits) --- .../ClangLinkerWrapper.cpp | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 6443244a2e1bc..95c4f76818ed0 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1681,28 +1681,6 @@ Expected linkDevice(ArrayRef InputFiles, } } -void diagnosticHandler(const DiagnosticInfo &DI) { - std::string ErrStorage; - raw_string_ostream OS(ErrStorage); - DiagnosticPrinterRawOStream DP(OS); - DI.print(DP); - - switch (DI.getSeverity()) { - case DS_Error: - WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Warning: - WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Note: - WithColor::note(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Remark: - WithColor::remark(errs()) << ErrStorage << "\n"; - break; - } -} - // Compile the module to an object file using the appropriate target machine for // the host triple. Expected compileModule(Module &M, OffloadKind Kind) {