Skip to content

Commit 507c506

Browse files
committed
Update for comments:
* Remove the support for multiple spanning layers * Reorder renaming to blindly attempt a rename first, and only attempt to create the directory tree if that fails on the assumption that directory collisions are eventually going to be more likely than not.
1 parent 9b1d0db commit 507c506

File tree

1 file changed

+49
-38
lines changed

1 file changed

+49
-38
lines changed

llvm/lib/CAS/OnDiskGraphDB.cpp

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -596,17 +596,15 @@ Error OnDiskGraphDB::TempFile::keep(const Twine &Name) {
596596
assert(!Done);
597597
Done = true;
598598
// Always try to close and rename.
599-
std::error_code RenameEC = sys::fs::create_directories(
600-
sys::path::parent_path(Name.str())
601-
);
602-
603-
// We consider both the creation of the directory for the persistent storage
604-
// and the final rename itself to be a failure to rename. This is consistent
605-
// with other filesystem APIs that provide file manipulation APIs that will
606-
// automatically construct directories as needed, so it seems like
607-
// a reasonable choice.
608-
if (!RenameEC)
609-
RenameEC = sys::fs::rename(TmpName, Name);
599+
std::error_code RenameEC = sys::fs::rename(TmpName, Name);
600+
if (RenameEC) {
601+
// If the rename fails we assume we have not created all the required
602+
// intermediate directories, so we'll try to do that now, and then
603+
// retry the rename.
604+
RenameEC = sys::fs::create_directories(sys::path::parent_path(Name.str()));
605+
if (!RenameEC)
606+
RenameEC = sys::fs::rename(TmpName, Name);
607+
}
610608

611609
if (Logger)
612610
Logger->log_TempFile_keep(TmpName, Name.str(), RenameEC);
@@ -1269,44 +1267,57 @@ void OnDiskGraphDB::getStandalonePath(StringRef Suffix, const IndexProxy &I,
12691267
if (TempPath)
12701268
TempPath->assign(RootPath.begin(), RootPath.end());
12711269

1272-
static const unsigned IntermediateDirCount = 2;
1273-
static const unsigned EntropyPerIntermediateDir = 8;
1274-
static const unsigned CharactersPerDirName =
1275-
((EntropyPerIntermediateDir + 15) & ~7) / 8;
1276-
1277-
static const unsigned TotalEntropyBits =
1278-
IntermediateDirCount * EntropyPerIntermediateDir;
1279-
static const unsigned HashByteCount = ((TotalEntropyBits + 7) & ~7) / 8;
1270+
// 10 bits of entropy should be sufficient, we could go higher, but increasing
1271+
// this starts increasing the likelihood of collisions in filesystem hash
1272+
// structures. We could possibly go lower as well, but changing this would
1273+
// require a version bump, so 10 bits seems like a reasonable balance of
1274+
// number of fs entries vs path misses.
1275+
// If we find we need to increase the entropy, other implementations of
1276+
// directory fanning don't appear to use more than 16 bits of entropy in a
1277+
// single directory before starting to introduce additional fanning layers.
1278+
static const unsigned EntropyForIntermediateDir = 10;
1279+
static_assert(EntropyForIntermediateDir < 32,
1280+
"The logic below requires `EntropyForIntermediateDir` to be "
1281+
"less than 32 bits.");
1282+
1283+
// We're using hex, so we get 4 bits per character
1284+
static const unsigned CharactersForIntermediateDirName =
1285+
(EntropyForIntermediateDir + 3) / 4;
1286+
static const unsigned HashByteCount =
1287+
((EntropyForIntermediateDir + 7) & ~7) / 8;
12801288
static_assert(HashByteCount <= sizeof(uint32_t));
12811289

1282-
// We will never need 32 bits of entropy, so we just assert that now as it
1283-
// means we don't need to worry about overflows in the shifts below.
1284-
static_assert(TotalEntropyBits < 32);
1285-
12861290
// It would be nice if there was a way to statically assert the size of an
1287-
// ArrayRef.
1291+
// ArrayRef. We are always going to copy 32 bits out, and it should always be
1292+
// larger than that.
12881293
assert(I.Hash.size() >= sizeof(uint32_t));
12891294

12901295
uint32_t Hash;
12911296
memcpy(&Hash, I.Hash.data(), sizeof(Hash));
12921297

1293-
for (unsigned IntermediateDir = 0; IntermediateDir < IntermediateDirCount; ++IntermediateDir) {
1294-
unsigned DirValue = Hash & ((1u << EntropyPerIntermediateDir) - 1);
1295-
Hash >>= EntropyPerIntermediateDir;
1296-
char DirNameCharacters[CharactersPerDirName];
1297-
for (unsigned CharIdx = 0; CharIdx < CharactersPerDirName; ++CharIdx) {
1298-
// Mod and divide for clarity, this gets lowered to bit operations as
1299-
// expected.
1300-
unsigned CharValue = DirValue % 16;
1301-
DirValue /= 16;
1302-
DirNameCharacters[CharIdx] =
1303-
hexdigit(CharValue, /*LowerCase=*/false);
1304-
}
1305-
sys::path::append(PersistentPath, StringRef(DirNameCharacters, CharactersPerDirName));
1298+
// Mask off the unnecessary bits. The result is that the last character in the
1299+
// name may have less variance, but as we care about the total entropy of the
1300+
// name, this is fine.
1301+
unsigned DirValue = Hash & ((1u << EntropyForIntermediateDir) - 1);
1302+
1303+
char DirNameCharacters[CharactersForIntermediateDirName];
1304+
for (unsigned CharIdx = 0; CharIdx < CharactersForIntermediateDirName; ++CharIdx) {
1305+
// Mod and divide for clarity, this gets lowered to bitwise operations as
1306+
// expected.
1307+
unsigned CharValue = DirValue % 16;
1308+
DirValue /= 16;
1309+
DirNameCharacters[CharIdx] =
1310+
hexdigit(CharValue, /*LowerCase=*/false);
13061311
}
1312+
1313+
StringRef IntermediateDirName =
1314+
StringRef(DirNameCharacters, CharactersForIntermediateDirName);
1315+
sys::path::append(PersistentPath, IntermediateDirName);
1316+
1317+
// Use a lambda here so that we can safely use a Twine repeatedly rather than
1318+
// performing an allocation, or duplicating the file name construction.
13071319
auto AppendFileName = [&](Twine FileName) {
13081320
sys::path::append(PersistentPath, FileName);
1309-
13101321
if (TempPath)
13111322
sys::path::append(*TempPath, FileName);
13121323
};

0 commit comments

Comments
 (0)