Skip to content

Commit 981ee95

Browse files
mjguziktorvalds
authored andcommitted
vfs: avoid duplicating creds in faccessat if possible
access(2) remains commonly used, for example on exec: access("/etc/ld.so.preload", R_OK) or when running gcc: strace -c gcc empty.c % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 0.00 0.000000 0 42 26 access It falls down to do_faccessat without the AT_EACCESS flag, which in turn results in allocation of new creds in order to modify fsuid/fsgid and caps. This is a very expensive process single-threaded and most notably multi-threaded, with numerous structures getting refed and unrefed on imminent new cred destruction. Turns out for typical consumers the resulting creds would be identical and this can be checked upfront, avoiding the hard work. An access benchmark plugged into will-it-scale running on Cascade Lake shows: test proc before after access1 1 1310582 2908735 (+121%) # distinct files access1 24 4716491 63822173 (+1353%) # distinct files access2 24 2378041 5370335 (+125%) # same file The above benchmarks are not integrated into will-it-scale, but can be found in a pull request: https://github.com/antonblanchard/will-it-scale/pull/36/files Signed-off-by: Mateusz Guzik <[email protected]> Cc: Al Viro <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent a4eecba commit 981ee95

File tree

1 file changed

+37
-1
lines changed

1 file changed

+37
-1
lines changed

fs/open.c

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,37 @@ COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset
368368
* access() needs to use the real uid/gid, not the effective uid/gid.
369369
* We do this by temporarily clearing all FS-related capabilities and
370370
* switching the fsuid/fsgid around to the real ones.
371+
*
372+
* Creating new credentials is expensive, so we try to skip doing it,
373+
* which we can if the result would match what we already got.
371374
*/
375+
static bool access_need_override_creds(int flags)
376+
{
377+
const struct cred *cred;
378+
379+
if (flags & AT_EACCESS)
380+
return false;
381+
382+
cred = current_cred();
383+
if (!uid_eq(cred->fsuid, cred->uid) ||
384+
!gid_eq(cred->fsgid, cred->gid))
385+
return true;
386+
387+
if (!issecure(SECURE_NO_SETUID_FIXUP)) {
388+
kuid_t root_uid = make_kuid(cred->user_ns, 0);
389+
if (!uid_eq(cred->uid, root_uid)) {
390+
if (!cap_isclear(cred->cap_effective))
391+
return true;
392+
} else {
393+
if (!cap_isidentical(cred->cap_effective,
394+
cred->cap_permitted))
395+
return true;
396+
}
397+
}
398+
399+
return false;
400+
}
401+
372402
static const struct cred *access_override_creds(void)
373403
{
374404
const struct cred *old_cred;
@@ -378,6 +408,12 @@ static const struct cred *access_override_creds(void)
378408
if (!override_cred)
379409
return NULL;
380410

411+
/*
412+
* XXX access_need_override_creds performs checks in hopes of skipping
413+
* this work. Make sure it stays in sync if making any changes in this
414+
* routine.
415+
*/
416+
381417
override_cred->fsuid = override_cred->uid;
382418
override_cred->fsgid = override_cred->gid;
383419

@@ -437,7 +473,7 @@ static long do_faccessat(int dfd, const char __user *filename, int mode, int fla
437473
if (flags & AT_EMPTY_PATH)
438474
lookup_flags |= LOOKUP_EMPTY;
439475

440-
if (!(flags & AT_EACCESS)) {
476+
if (access_need_override_creds(flags)) {
441477
old_cred = access_override_creds();
442478
if (!old_cred)
443479
return -ENOMEM;

0 commit comments

Comments
 (0)