@@ -596,17 +596,15 @@ Error OnDiskGraphDB::TempFile::keep(const Twine &Name) {
596
596
assert (!Done);
597
597
Done = true ;
598
598
// Always try to close and rename.
599
- std::error_code RenameEC = sys::fs::create_directories (
600
- sys::path::parent_path (Name.str ())
601
- );
602
-
603
- // We consider both the creation of the directory for the persistent storage
604
- // and the final rename itself to be a failure to rename. This is consistent
605
- // with other filesystem APIs that provide file manipulation APIs that will
606
- // automatically construct directories as needed, so it seems like
607
- // a reasonable choice.
608
- if (!RenameEC)
609
- RenameEC = sys::fs::rename (TmpName, Name);
599
+ std::error_code RenameEC = sys::fs::rename (TmpName, Name);
600
+ if (RenameEC) {
601
+ // If the rename fails we assume we have not created all the required
602
+ // intermediate directories, so we'll try to do that now, and then
603
+ // retry the rename.
604
+ RenameEC = sys::fs::create_directories (sys::path::parent_path (Name.str ()));
605
+ if (!RenameEC)
606
+ RenameEC = sys::fs::rename (TmpName, Name);
607
+ }
610
608
611
609
if (Logger)
612
610
Logger->log_TempFile_keep (TmpName, Name.str (), RenameEC);
@@ -1269,44 +1267,57 @@ void OnDiskGraphDB::getStandalonePath(StringRef Suffix, const IndexProxy &I,
1269
1267
if (TempPath)
1270
1268
TempPath->assign (RootPath.begin (), RootPath.end ());
1271
1269
1272
- static const unsigned IntermediateDirCount = 2 ;
1273
- static const unsigned EntropyPerIntermediateDir = 8 ;
1274
- static const unsigned CharactersPerDirName =
1275
- ((EntropyPerIntermediateDir + 15 ) & ~7 ) / 8 ;
1276
-
1277
- static const unsigned TotalEntropyBits =
1278
- IntermediateDirCount * EntropyPerIntermediateDir;
1279
- static const unsigned HashByteCount = ((TotalEntropyBits + 7 ) & ~7 ) / 8 ;
1270
+ // 10 bits of entropy should be sufficient, we could go higher, but increasing
1271
+ // this starts increasing the likelihood of collisions in filesystem hash
1272
+ // structures. We could possibly go lower as well, but changing this would
1273
+ // require a version bump, so 10 bits seems like a reasonable balance of
1274
+ // number of fs entries vs path misses.
1275
+ // If we find we need to increase the entropy, other implementations of
1276
+ // directory fanning don't appear to use more than 16 bits of entropy in a
1277
+ // single directory before starting to introduce additional fanning layers.
1278
+ static const unsigned EntropyForIntermediateDir = 10 ;
1279
+ static_assert (EntropyForIntermediateDir < 32 ,
1280
+ " The logic below requires `EntropyForIntermediateDir` to be "
1281
+ " less than 32 bits." );
1282
+
1283
+ // We're using hex, so we get 4 bits per character
1284
+ static const unsigned CharactersForIntermediateDirName =
1285
+ (EntropyForIntermediateDir + 3 ) / 4 ;
1286
+ static const unsigned HashByteCount =
1287
+ ((EntropyForIntermediateDir + 7 ) & ~7 ) / 8 ;
1280
1288
static_assert (HashByteCount <= sizeof (uint32_t ));
1281
1289
1282
- // We will never need 32 bits of entropy, so we just assert that now as it
1283
- // means we don't need to worry about overflows in the shifts below.
1284
- static_assert (TotalEntropyBits < 32 );
1285
-
1286
1290
// It would be nice if there was a way to statically assert the size of an
1287
- // ArrayRef.
1291
+ // ArrayRef. We are always going to copy 32 bits out, and it should always be
1292
+ // larger than that.
1288
1293
assert (I.Hash .size () >= sizeof (uint32_t ));
1289
1294
1290
1295
uint32_t Hash;
1291
1296
memcpy (&Hash, I.Hash .data (), sizeof (Hash));
1292
1297
1293
- for ( unsigned IntermediateDir = 0 ; IntermediateDir < IntermediateDirCount; ++IntermediateDir) {
1294
- unsigned DirValue = Hash & (( 1u << EntropyPerIntermediateDir) - 1 );
1295
- Hash >>= EntropyPerIntermediateDir;
1296
- char DirNameCharacters[CharactersPerDirName] ;
1297
- for ( unsigned CharIdx = 0 ; CharIdx < CharactersPerDirName; ++CharIdx) {
1298
- // Mod and divide for clarity, this gets lowered to bit operations as
1299
- // expected.
1300
- unsigned CharValue = DirValue % 16 ;
1301
- DirValue /= 16 ;
1302
- DirNameCharacters[CharIdx] =
1303
- hexdigit (CharValue, /* LowerCase= */ false ) ;
1304
- }
1305
- sys::path::append (PersistentPath, StringRef (DirNameCharacters, CharactersPerDirName) );
1298
+ // Mask off the unnecessary bits. The result is that the last character in the
1299
+ // name may have less variance, but as we care about the total entropy of the
1300
+ // name, this is fine.
1301
+ unsigned DirValue = Hash & (( 1u << EntropyForIntermediateDir) - 1 ) ;
1302
+
1303
+ char DirNameCharacters[CharactersForIntermediateDirName];
1304
+ for ( unsigned CharIdx = 0 ; CharIdx < CharactersForIntermediateDirName; ++CharIdx) {
1305
+ // Mod and divide for clarity, this gets lowered to bitwise operations as
1306
+ // expected.
1307
+ unsigned CharValue = DirValue % 16 ;
1308
+ DirValue /= 16 ;
1309
+ DirNameCharacters[CharIdx] =
1310
+ hexdigit (CharValue, /* LowerCase= */ false );
1306
1311
}
1312
+
1313
+ StringRef IntermediateDirName =
1314
+ StringRef (DirNameCharacters, CharactersForIntermediateDirName);
1315
+ sys::path::append (PersistentPath, IntermediateDirName);
1316
+
1317
+ // Use a lambda here so that we can safely use a Twine repeatedly rather than
1318
+ // performing an allocation, or duplicating the file name construction.
1307
1319
auto AppendFileName = [&](Twine FileName) {
1308
1320
sys::path::append (PersistentPath, FileName);
1309
-
1310
1321
if (TempPath)
1311
1322
sys::path::append (*TempPath, FileName);
1312
1323
};
0 commit comments