From d09b710da49100d2def6ba853c3a3093aa452644 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 23 Apr 2023 18:14:31 +0200 Subject: [PATCH] Change realpath cache hash algorithm to the regular string hash algorithm Right now the FNV-1 algorithm is used for determine the realpath cache key. For applications that are light-weight, but have lots of files (e.g. WordPress), the realpath cache key computation shows up in the Callgrind profile. The reason is that we do a simple byte-by-byte loop. Furthermore, we always use the 32-bit prime and offset values, even in a 64-bit environment which reduces the diffusion property of the hash. This hinders the distribution of keys a bit (although probably not a lot since we have only limited entries in the cache). I propose to switch to our regular string hashing algorithm, which is better optimised than a byte-per-byte loop, and has better diffusion on 64-bit systems. I don't know why FNV-1 was chosen over the DJB33X algorithm we use in the normal string hashing. Also, I don't know why FNV-1A wasn't chosen instead of FNV-1, which would be a simple modification and would distribute the hashes better than FNV-1. The only thing I can think of is that typically FNV-1A has a better distribution than DJB33X algorithms like what we use for string hashing [1]. But I doubt that makes a difference here, and if it does then we should perhaps look into changing the string hash algorithm from DJB33X to FNV-1A. [1] https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed --- Zend/zend_virtual_cwd.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/Zend/zend_virtual_cwd.c b/Zend/zend_virtual_cwd.c index 6bff2ad984d02..7806002d37294 100644 --- a/Zend/zend_virtual_cwd.c +++ b/Zend/zend_virtual_cwd.c @@ -317,23 +317,16 @@ CWD_API char *virtual_getcwd(char *buf, size_t size) /* {{{ */ #ifdef ZEND_WIN32 static inline zend_ulong realpath_cache_key(const char *path, size_t path_len) /* {{{ */ { - zend_ulong h; size_t bucket_key_len; - const char *bucket_key_start = tsrm_win32_get_path_sid_key(path, path_len, &bucket_key_len); - const char *bucket_key = bucket_key_start; - const char *e; + const char *bucket_key = tsrm_win32_get_path_sid_key(path, path_len, &bucket_key_len); if (!bucket_key) { return 0; } - e = bucket_key + bucket_key_len; - for (h = Z_UL(2166136261); bucket_key < e;) { - h *= Z_UL(16777619); - h ^= *bucket_key++; - } - if (bucket_key_start != path) { - HeapFree(GetProcessHeap(), 0, (LPVOID)bucket_key_start); + zend_ulong h = zend_hash_func(bucket_key, bucket_key_len); + if (bucket_key != path) { + HeapFree(GetProcessHeap(), 0, (LPVOID)bucket_key); } return h; } @@ -341,15 +334,7 @@ static inline zend_ulong realpath_cache_key(const char *path, size_t path_len) / #else static inline zend_ulong realpath_cache_key(const char *path, size_t path_len) /* {{{ */ { - zend_ulong h; - const char *e = path + path_len; - - for (h = Z_UL(2166136261); path < e;) { - h *= Z_UL(16777619); - h ^= *path++; - } - - return h; + return zend_hash_func(path, path_len); } /* }}} */ #endif /* defined(ZEND_WIN32) */