|
52 | 52 | #include "llvm/Transforms/ObjCARC.h" |
53 | 53 | #include "llvm/Transforms/Utils/FunctionImportUtils.h" |
54 | 54 |
|
| 55 | +#include <numeric> |
| 56 | + |
55 | 57 | using namespace llvm; |
56 | 58 |
|
57 | 59 | #define DEBUG_TYPE "thinlto" |
@@ -881,11 +883,24 @@ void ThinLTOCodeGenerator::run() { |
881 | 883 | for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) |
882 | 884 | ExportLists[DefinedGVSummaries.first()]; |
883 | 885 |
|
| 886 | + // Compute the ordering we will process the inputs: the rough heuristic here |
| 887 | + // is to sort them per size so that the largest module get schedule as soon as |
| 888 | + // possible. This is purely a compile-time optimization. |
| 889 | + std::vector<int> ModulesOrdering; |
| 890 | + ModulesOrdering.resize(Modules.size()); |
| 891 | + std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); |
| 892 | + std::sort(ModulesOrdering.begin(), ModulesOrdering.end(), |
| 893 | + [&](int LeftIndex, int RightIndex) { |
| 894 | + auto LSize = Modules[LeftIndex].getBufferSize(); |
| 895 | + auto RSize = Modules[RightIndex].getBufferSize(); |
| 896 | + return LSize > RSize; |
| 897 | + }); |
| 898 | + |
884 | 899 | // Parallel optimizer + codegen |
885 | 900 | { |
886 | 901 | ThreadPool Pool(getNumCores()); |
887 | | - int count = 0; |
888 | | - for (auto &ModuleBuffer : Modules) { |
| 902 | + for (auto IndexCount : ModulesOrdering) { |
| 903 | + auto &ModuleBuffer = Modules[IndexCount]; |
889 | 904 | Pool.async([&](int count) { |
890 | 905 | auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier(); |
891 | 906 | auto &ExportList = ExportLists[ModuleIdentifier]; |
@@ -937,8 +952,7 @@ void ThinLTOCodeGenerator::run() { |
937 | 952 |
|
938 | 953 | OutputBuffer = CacheEntry.write(std::move(OutputBuffer)); |
939 | 954 | ProducedBinaries[count] = std::move(OutputBuffer); |
940 | | - }, count); |
941 | | - count++; |
| 955 | + }, IndexCount); |
942 | 956 | } |
943 | 957 | } |
944 | 958 |
|
|
0 commit comments