From 0ae35c0efc02422f577bc0336ce9cef14b8339a0 Mon Sep 17 00:00:00 2001 From: Lukas Eipert Date: Wed, 12 Apr 2023 14:33:48 +0200 Subject: [PATCH] [perf] `ExportMap`: Improve ExportMap.for performance on larger codebases While looking at a larger code base https://gitlab.com/gitlab-org/gitlab, `ExportMap.for` seems to take a significant amount of time. More than half of it is spend on calculating hashes with `hashObject`. Digging a little deeper, it seems like we are calling it around 500 thousand times. Each iteration calculates the hash of a context object. This context object is created inside of the `childContext` function and consists of four parts: - `settings` -> an Object itself - `parserOptions` -> an Object itself - `parserPath` -> a String - `path` -> a String. Interestingly `settings`, `parserOptions` and `parserPath` rarely do change for us, so calculating their hashes on every iteration seems unnecessary. `hashObject` recursively calculates the hashes of each key / value pair. So instead of doing: ```js cacheKey = hashObject({settings, parserOptions, parserPath, path}) ``` We could also do: ```js cacheKey = parserPath + hashObject(parserOptions) + hashObject(settings) + path ``` This would be just as stable as before, although resulting in longer cache keys. `parserPath` and `path` would not need to be hashed, because they are strings and a single character change in them would result in a different cache key. Furthermore we can memoize the hashes of `parserOptions` and `settings`, in case they didn't change compared. The equality is checked with a simple `JSON.stringify`. We move this `cacheKey` calculation to `childContext`, adding the cache key to the `context` object. This way, we can fall back to the old calculation inside of `ExportMap.for`, as it is a public interface which consumers might be using. In our code base the results speak for itself: - 51.59s spent in `ExportMap.for`, 0ms spent in `childContext`. - 16.89s is spent in node:crypto/hash `update` (overall) - 41.02s spent in `ExportMap.for, 1.91s spent in `childContext`. - Almost no time spent in `hashObject`, actually all calls in our flame graph come from other code paths - 7.86s is spent in node:crypto/hash `update` (overall) So on this machine, and project, we are cutting the execution time of `ExportMap.for` in half. On machines, which are hashing slower, the effect might be more pronounced. Similarly machines with less memory, as the `hashObject` function creates a lot of tiny strings. One side-effect here could be, that the memoization is in-efficient if the `settings` or `parserOptions` change often. (I cannot think of such a scenario, but I am not that versed in the use cases of this plugin.) But even then, the overhead should mainly be the `JSON.stringify`. --- CHANGELOG.md | 3 +++ src/ExportMap.js | 21 +++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01ab3f0819..bbfaa0fb98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This change log adheres to standards from [Keep a CHANGELOG](https://keepachange ### Fixed - [`no-duplicates`]: remove duplicate identifiers in duplicate imports ([#2577], thanks [@joe-matsec]) - [`consistent-type-specifier-style`]: fix accidental removal of comma in certain cases ([#2754], thanks [@bradzacher]) +- [Perf] `ExportMap`: Improve `ExportMap.for` performance on larger codebases ([#2756], thanks [@leipert]) ### Changed - [Docs] [`no-duplicates`]: fix example schema ([#2684], thanks [@simmo]) @@ -1066,6 +1067,7 @@ for info on changes for earlier releases. [`memo-parser`]: ./memo-parser/README.md +[#2756]: https://github.com/import-js/eslint-plugin-import/pull/2756 [#2754]: https://github.com/import-js/eslint-plugin-import/pull/2754 [#2748]: https://github.com/import-js/eslint-plugin-import/pull/2748 [#2699]: https://github.com/import-js/eslint-plugin-import/pull/2699 @@ -1737,6 +1739,7 @@ for info on changes for earlier releases. [@kylemh]: https://github.com/kylemh [@laysent]: https://github.com/laysent [@le0nik]: https://github.com/le0nik +[@leipert]: https://github.com/leipert [@lemonmade]: https://github.com/lemonmade [@lencioni]: https://github.com/lencioni [@leonardodino]: https://github.com/leonardodino diff --git a/src/ExportMap.js b/src/ExportMap.js index 7b8c883143..d31375c83d 100644 --- a/src/ExportMap.js +++ b/src/ExportMap.js @@ -305,7 +305,7 @@ ExportMap.get = function (source, context) { ExportMap.for = function (context) { const { path } = context; - const cacheKey = hashObject(context).digest('hex'); + const cacheKey = context.cacheKey || hashObject(context).digest('hex'); let exportMap = exportCache.get(cacheKey); // return cached ignore @@ -559,7 +559,7 @@ ExportMap.parse = function (path, content, context) { if (tsConfigInfo.tsConfigPath !== undefined) { // Projects not using TypeScript won't have `typescript` installed. if (!ts) { ts = require('typescript'); } // eslint-disable-line import/no-extraneous-dependencies - + const configFile = ts.readConfigFile(tsConfigInfo.tsConfigPath, ts.sys.readFile); return ts.parseJsonConfigFileContent( configFile.config, @@ -781,12 +781,29 @@ export function recursivePatternCapture(pattern, callback) { } } +let parserOptionsHash = ''; +let prevParserOptions = ''; +let settingsHash = ''; +let prevSettings = ''; /** * don't hold full context object in memory, just grab what we need. + * also calculate a cacheKey, where parts of the cacheKey hash are memoized */ function childContext(path, context) { const { settings, parserOptions, parserPath } = context; + + if (JSON.stringify(settings) !== prevSettings) { + settingsHash = hashObject({ settings }).digest('hex'); + prevSettings = JSON.stringify(settings); + } + + if (JSON.stringify(parserOptions) !== prevParserOptions) { + parserOptionsHash = hashObject({ parserOptions }).digest('hex'); + prevParserOptions = JSON.stringify(parserOptions); + } + return { + cacheKey: String(parserPath) + parserOptionsHash + settingsHash + String(path), settings, parserOptions, parserPath,