diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c index 44331b90e7d93..cb5c7c137a0ba 100644 --- a/clang/test/Driver/linker-wrapper-libs.c +++ b/clang/test/Driver/linker-wrapper-libs.c @@ -161,7 +161,7 @@ int bar() { return weak; } // RUN: --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED -// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}} {{.*}}.o +// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o // LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o // LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o @@ -185,7 +185,7 @@ int bar() { return weak; } // RUN: --linker-path=/usr/bin/ld %t.o --whole-archive %t.a -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=LIBRARY-WHOLE-ARCHIVE -// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}} {{.*}}.o +// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o // LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o // LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.o // LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a {{.*}}.o diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 0cd85c8470afa..5b3f56b4799fe 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -48,7 +48,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --save-temps -O2 \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS -// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -foffload-lto -Wl,--no-undefined {{.*}} -save-temps +// AMDGPU-LTO-TEMPS: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -O2 -foffload-lto -Wl,--no-undefined {{.*}}.o -save-temps // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 5fe6f551e02f7..8a44099b34e87 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -147,9 +147,6 @@ static std::list> TempFiles; /// Codegen flags for LTO backend. static codegen::RegisterCodeGenFlags CodeGenFlags; -/// Global flag to indicate that the LTO pipeline threw an error. -static std::atomic LTOError; - static std::optional SYCLModuleSplitMode; static bool UseSYCLPostLinkTool; @@ -285,15 +282,17 @@ Expected createOutputFile(const Twine &Prefix, StringRef Extension) { return TempFiles.back(); } -Expected writeOffloadFile(const OffloadFile &File) { +// TODO: Remove HasSYCLOffloadKind dependence when aligning with community code. +Expected writeOffloadFile(const OffloadFile &File, + bool HasSYCLOffloadKind = false) { const OffloadBinary &Binary = *File.getBinary(); StringRef Prefix = sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier()); - StringRef Suffix = getImageKindName(Binary.getImageKind()); auto TempFileOrErr = createOutputFile( - Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), Suffix); + Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), + HasSYCLOffloadKind ? getImageKindName(Binary.getImageKind()) : "o"); if (!TempFileOrErr) return TempFileOrErr.takeError(); @@ -1307,7 +1306,7 @@ static Expected linkDevice(ArrayRef InputFiles, for (auto &Binary : Binaries) { auto BinTriple = Binary.getBinary()->getTriple(); if (BinTriple == Triple.getTriple()) { - auto FileNameOrErr = writeOffloadFile(Binary); + auto FileNameOrErr = writeOffloadFile(Binary, true /* SYCLOffload */); if (!FileNameOrErr) return FileNameOrErr.takeError(); ExtractedDeviceLibFiles.emplace_back(*FileNameOrErr); @@ -1684,360 +1683,6 @@ Expected linkDevice(ArrayRef InputFiles, } } -void diagnosticHandler(const DiagnosticInfo &DI) { - std::string ErrStorage; - raw_string_ostream OS(ErrStorage); - DiagnosticPrinterRawOStream DP(OS); - DI.print(DP); - - switch (DI.getSeverity()) { - case DS_Error: - WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Warning: - WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Note: - WithColor::note(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Remark: - WithColor::remark(errs()) << ErrStorage << "\n"; - break; - } -} - -// Get the list of target features from the input file and unify them such that -// if there are multiple +xxx or -xxx features we only keep the last one. -std::vector getTargetFeatures(ArrayRef InputFiles) { - SmallVector Features; - for (const OffloadFile &File : InputFiles) { - for (auto &Arg : llvm::split(File.getBinary()->getString("feature"), ",")) - Features.emplace_back(Arg); - } - - // Only add a feature if it hasn't been seen before starting from the end. - std::vector UnifiedFeatures; - DenseSet UsedFeatures; - for (StringRef Feature : llvm::reverse(Features)) { - if (UsedFeatures.insert(Feature.drop_front()).second) - UnifiedFeatures.push_back(Feature.str()); - } - - return UnifiedFeatures; -} - -template > -std::unique_ptr createLTO( - const ArgList &Args, const std::vector &Features, - ModuleHook Hook = [](size_t, const Module &) { return true; }) { - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - // We need to remove AMD's target-id from the processor if present. - StringRef TargetID = Args.getLastArgValue(OPT_arch_EQ); - StringRef Arch = clang::getProcessorFromTargetID(Triple, TargetID); - lto::Config Conf; - lto::ThinBackend Backend; - // TODO: Handle index-only thin-LTO - Backend = - lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); - - Conf.CPU = Arch.str(); - Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple); - - Conf.RemarksFilename = RemarksFilename; - Conf.RemarksPasses = RemarksPasses; - Conf.RemarksWithHotness = RemarksWithHotness; - Conf.RemarksHotnessThreshold = RemarksHotnessThreshold; - Conf.RemarksFormat = RemarksFormat; - - StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); - Conf.MAttrs = Features; - std::optional CGOptLevelOrNone = - CodeGenOpt::parseLevel(OptLevel[1]); - assert(CGOptLevelOrNone && "Invalid optimization level"); - Conf.CGOptLevel = *CGOptLevelOrNone; - Conf.OptLevel = OptLevel[1] - '0'; - Conf.DefaultTriple = Triple.getTriple(); - - // TODO: Should we complain about combining --opt-level and -passes, as opt - // does? That might be too limiting in clang-linker-wrapper, so for now we - // just warn in the help entry for -passes that the default corresponding - // to --opt-level=O? should be included there. The problem is that - // --opt-level produces effects in clang-linker-wrapper beyond what -passes - // appears to be able to achieve, so rejecting the combination of --opt-level - // and -passes would apparently make it impossible to combine those effects - // with a custom pass pipeline. - Conf.OptPipeline = PassPipeline; - Conf.PassPlugins = PassPlugins; - - LTOError = false; - Conf.DiagHandler = diagnosticHandler; - - Conf.PTO.LoopVectorization = Conf.OptLevel > 1; - Conf.PTO.SLPVectorization = Conf.OptLevel > 1; - - if (SaveTemps) { - std::string TempName = (sys::path::filename(ExecutableName) + "." + - Triple.getTriple() + "." + TargetID) - .str(); - Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) { - std::string File = - !Task ? TempName + ".postlink.bc" - : TempName + "." + std::to_string(Task) + ".postlink.bc"; - error_code EC; - raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - return true; - }; - Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) { - std::string File = - !Task ? TempName + ".postopt.bc" - : TempName + "." + std::to_string(Task) + ".postopt.bc"; - error_code EC; - raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - return true; - }; - } - Conf.PostOptModuleHook = Hook; - Conf.CGFileType = (Triple.isNVPTX() || SaveTemps) - ? CodeGenFileType::AssemblyFile - : CodeGenFileType::ObjectFile; - - // TODO: Handle remark files - Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program); - - return std::make_unique(std::move(Conf), Backend); -} - -// Returns true if \p S is valid as a C language identifier and will be given -// `__start_` and `__stop_` symbols. -bool isValidCIdentifier(StringRef S) { - return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && - llvm::all_of(llvm::drop_begin(S), - [](char C) { return C == '_' || isAlnum(C); }); -} - -Error linkBitcodeFiles(SmallVectorImpl &InputFiles, - SmallVectorImpl &OutputFiles, - const ArgList &Args) { - llvm::TimeTraceScope TimeScope("Link bitcode files"); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - - // Early exit for SPIR targets - if (Triple.isSPIROrSPIRV()) - return Error::success(); - - SmallVector BitcodeInputFiles; - DenseSet StrongResolutions; - DenseSet UsedInRegularObj; - DenseSet UsedInSharedLib; - BumpPtrAllocator Alloc; - StringSaver Saver(Alloc); - - // Search for bitcode files in the input and create an LTO input file. If - // it is not a bitcode file, scan its symbol table for symbols we need to - // save. - for (OffloadFile &File : InputFiles) { - MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), ""); - - file_magic Type = identify_magic(Buffer.getBuffer()); - switch (Type) { - case file_magic::bitcode: { - Expected IRSymtabOrErr = readIRSymtab(Buffer); - if (!IRSymtabOrErr) - return IRSymtabOrErr.takeError(); - - // Check for any strong resolutions we need to preserve. - for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { - for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) { - if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() && - !Sym.isUndefined()) - StrongResolutions.insert(Saver.save(Sym.Name)); - } - } - BitcodeInputFiles.emplace_back(std::move(File)); - continue; - } - case file_magic::elf_relocatable: - case file_magic::elf_shared_object: { - Expected> ObjFile = - ObjectFile::createObjectFile(Buffer); - if (!ObjFile) - continue; - - for (SymbolRef Sym : (*ObjFile)->symbols()) { - Expected Name = Sym.getName(); - if (!Name) - return Name.takeError(); - - // Record if we've seen these symbols in any object or shared - // libraries. - if ((*ObjFile)->isRelocatableObject()) - UsedInRegularObj.insert(Saver.save(*Name)); - else - UsedInSharedLib.insert(Saver.save(*Name)); - } - continue; - } - default: - continue; - } - } - - if (BitcodeInputFiles.empty()) - return Error::success(); - - // Remove all the bitcode files that we moved from the original input. - llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); }); - - // LTO Module hook to output bitcode without running the backend. - SmallVector BitcodeOutput; - auto OutputBitcode = [&](size_t, const Module &M) { - auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) + - "-jit-" + Triple.getTriple(), - "bc"); - if (!TempFileOrErr) - reportError(TempFileOrErr.takeError()); - - std::error_code EC; - raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - BitcodeOutput.push_back(*TempFileOrErr); - return false; - }; - - // We assume visibility of the whole program if every input file was - // bitcode. - auto Features = getTargetFeatures(BitcodeInputFiles); - auto LTOBackend = Args.hasArg(OPT_embed_bitcode) || - Args.hasArg(OPT_builtin_bitcode_EQ) || - Args.hasArg(OPT_clang_backend) - ? createLTO(Args, Features, OutputBitcode) - : createLTO(Args, Features); - - // We need to resolve the symbols so the LTO backend knows which symbols - // need to be kept or can be internalized. This is a simplified symbol - // resolution scheme to approximate the full resolution a linker would do. - uint64_t Idx = 0; - DenseSet PrevailingSymbols; - for (auto &BitcodeInput : BitcodeInputFiles) { - // Get a semi-unique buffer identifier for Thin-LTO. - StringRef Identifier = Saver.save( - std::to_string(Idx++) + "." + - BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier()); - MemoryBufferRef Buffer = - MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier); - Expected> BitcodeFileOrErr = - llvm::lto::InputFile::create(Buffer); - if (!BitcodeFileOrErr) - return BitcodeFileOrErr.takeError(); - - // Save the input file and the buffer associated with its memory. - const auto Symbols = (*BitcodeFileOrErr)->symbols(); - SmallVector Resolutions(Symbols.size()); - size_t Idx = 0; - for (auto &Sym : Symbols) { - lto::SymbolResolution &Res = Resolutions[Idx++]; - - // We will use this as the prevailing symbol definition in LTO unless - // it is undefined or another definition has already been used. - Res.Prevailing = - !Sym.isUndefined() && - !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) && - PrevailingSymbols.insert(Saver.save(Sym.getName())).second; - - // We need LTO to preseve the following global symbols: - // 1) Symbols used in regular objects. - // 2) Sections that will be given a __start/__stop symbol. - // 3) Prevailing symbols that are needed visible to external - // libraries. - Res.VisibleToRegularObj = - UsedInRegularObj.contains(Sym.getName()) || - isValidCIdentifier(Sym.getSectionName()) || - (Res.Prevailing && - (Sym.getVisibility() != GlobalValue::HiddenVisibility && - !Sym.canBeOmittedFromSymbolTable())); - - // Identify symbols that must be exported dynamically and can be - // referenced by other files. - Res.ExportDynamic = - Sym.getVisibility() != GlobalValue::HiddenVisibility && - (UsedInSharedLib.contains(Sym.getName()) || - !Sym.canBeOmittedFromSymbolTable()); - - // The final definition will reside in this linkage unit if the symbol - // is defined and local to the module. This only checks for bitcode - // files, full assertion will require complete symbol resolution. - Res.FinalDefinitionInLinkageUnit = - Sym.getVisibility() != GlobalValue::DefaultVisibility && - (!Sym.isUndefined() && !Sym.isCommon()); - - // We do not support linker redefined symbols (e.g. --wrap) for device - // image linking, so the symbols will not be changed after LTO. - Res.LinkerRedefined = false; - } - - // Add the bitcode file with its resolved symbols to the LTO job. - if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions)) - return Err; - } - - // Run the LTO job to compile the bitcode. - size_t MaxTasks = LTOBackend->getMaxTasks(); - SmallVector Files(MaxTasks); - auto AddStream = - [&](size_t Task, - const Twine &ModuleName) -> std::unique_ptr { - int FD = -1; - auto &TempFile = Files[Task]; - StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o"; - std::string TaskStr = Task ? "." + std::to_string(Task) : ""; - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName) + "." + - Triple.getTriple() + "." + Arch + TaskStr, - Extension); - if (!TempFileOrErr) - reportError(TempFileOrErr.takeError()); - TempFile = *TempFileOrErr; - if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD)) - reportError(errorCodeToError(EC)); - return std::make_unique( - std::make_unique(FD, true)); - }; - - if (Error Err = LTOBackend->run(AddStream)) - return Err; - - if (LTOError) - return createStringError("Errors encountered inside the LTO pipeline."); - - // If we are embedding bitcode we only need the intermediate output. - bool SingleOutput = Files.size() == 1; - if (Args.hasArg(OPT_embed_bitcode)) { - if (BitcodeOutput.size() != 1 || !SingleOutput) - return createStringError("Cannot embed bitcode with multiple files."); - OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front())); - return Error::success(); - } - - // Append the new inputs to the device linker input. If the user requested - // an internalizing link we need to pass the bitcode to clang. - for (StringRef File : - Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ) - ? BitcodeOutput - : Files) - OutputFiles.push_back(File); - - return Error::success(); -} - // Compile the module to an object file using the appropriate target machine for // the host triple. Expected compileModule(Module &M, OffloadKind Kind) { @@ -2366,7 +2011,7 @@ Expected> linkAndWrapDeviceFiles( SmallVector InputFiles; // Write device inputs to an output file for the linker. for (const OffloadFile &File : Input) { - auto FileNameOrErr = writeOffloadFile(File); + auto FileNameOrErr = writeOffloadFile(File, HasSYCLOffloadKind); if (!FileNameOrErr) return FileNameOrErr.takeError(); InputFiles.emplace_back(*FileNameOrErr); @@ -2451,12 +2096,8 @@ Expected> linkAndWrapDeviceFiles( WrappedOutput.push_back(*OutputFile); } if (HasNonSYCLOffloadKinds) { - // First link and remove all the input files containing bitcode. + // Write any remaining device inputs to an output file. SmallVector InputFiles; - if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs)) - return Err; - - // Write any remaining device inputs to an output file for the linker. for (const OffloadFile &File : Input) { auto FileNameOrErr = writeOffloadFile(File); if (!FileNameOrErr) @@ -2465,9 +2106,7 @@ Expected> linkAndWrapDeviceFiles( } // Link the remaining device files using the device linker. - auto OutputOrErr = !Args.hasArg(OPT_embed_bitcode) - ? linkDevice(InputFiles, LinkerArgs) - : InputFiles.front(); + auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); if (!OutputOrErr) return OutputOrErr.takeError(); // Store the offloading image for each linked output file.