diff --git a/lib/languages.js b/lib/languages.js index 77e0cc6956c1..2289886d7eb7 100644 --- a/lib/languages.js +++ b/lib/languages.js @@ -117,7 +117,8 @@ if (process.env.ENABLED_LANGUAGES) { Object.keys(languages).forEach((code) => { if (!process.env.ENABLED_LANGUAGES.includes(code)) delete languages[code] }) - console.log(`ENABLED_LANGUAGES: ${process.env.ENABLED_LANGUAGES}`) + // This makes the translation health report not valid JSON + // console.log(`ENABLED_LANGUAGES: ${process.env.ENABLED_LANGUAGES}`) } } else if (process.env.NODE_ENV === 'test') { // Unless explicitly set, when running tests default to just English diff --git a/lib/page-data.js b/lib/page-data.js index b5e1e8170b40..cea16abff99c 100644 --- a/lib/page-data.js +++ b/lib/page-data.js @@ -112,12 +112,13 @@ async function translateTree(dir, langObj, enTree) { // has something wrong with, say, the `versions` frontmatter key // we don't even care because we won't be using it anyway. if (translatableFrontmatterKeys.includes(property)) { - const msg = `frontmatter error on '${property}' (in ${fullPath}) so falling back to English` + const message = `frontmatter error on '${property}' (in ${fullPath}) so falling back to English` if (DEBUG_TRANSLATION_FALLBACKS) { - console.warn(msg) + // The object format is so the health report knows which path the issue is on + console.warn({ message, path: relativePath }) } if (THROW_TRANSLATION_ERRORS) { - throw new Error(msg) + throw new Error(message) } data[property] = enData[property] } @@ -128,12 +129,13 @@ async function translateTree(dir, langObj, enTree) { if (error.code === 'ENOENT' || error instanceof FrontmatterParsingError) { data = enData content = enPage.markdown - const msg = `Unable to initialized ${fullPath} because translation content file does not exist.` + const message = `Unable to initialize ${fullPath} because translation content file does not exist.` if (DEBUG_TRANSLATION_FALLBACKS) { - console.warn(msg) + // The object format is so the health report knows which path the issue is on + console.warn({ message, path: relativePath }) } if (THROW_TRANSLATION_ERRORS) { - throw new Error(msg) + throw new Error(message) } } else { throw error diff --git a/lib/redirects/precompile.js b/lib/redirects/precompile.js index abf87a6498c4..f7b7f6ca00b7 100755 --- a/lib/redirects/precompile.js +++ b/lib/redirects/precompile.js @@ -12,7 +12,7 @@ const EXCEPTIONS_FILE = path.join(__dirname, './static/redirect-exceptions.txt') // This function runs at server warmup and precompiles possible redirect routes. // It outputs them in key-value pairs within a neat Javascript object: { oldPath: newPath } -async function precompileRedirects(pageList) { +export async function precompileRedirects(pageList) { const allRedirects = readCompressedJsonFileFallback('./lib/redirects/static/developer.json') const externalRedirects = readCompressedJsonFileFallback('./lib/redirects/external-sites.json') diff --git a/lib/render-content/plugins/rewrite-asset-urls.js b/lib/render-content/plugins/rewrite-asset-urls.js index b2ec322ea20d..a0c344513107 100644 --- a/lib/render-content/plugins/rewrite-asset-urls.js +++ b/lib/render-content/plugins/rewrite-asset-urls.js @@ -49,9 +49,7 @@ function getNewSrc(node) { } catch (err) { console.warn( `Failed to get a hash for ${src} ` + - '(This is mostly harmless and can happen with outdated translations). ' + - 'Full error output:', - err + '(This is mostly harmless and can happen with outdated translations).' ) } } diff --git a/script/i18n/create-translation-health-report.js b/script/i18n/create-translation-health-report.js index d958a9de2648..ceb83cec2f0d 100755 --- a/script/i18n/create-translation-health-report.js +++ b/script/i18n/create-translation-health-report.js @@ -9,155 +9,120 @@ /* Nota bene: If you are getting more errors all the sudden, try running this: $ script/i18n/create-translation-health-report.js -l en -r 000 - If there's any errors, const context = { ... } probably needs more data. + If there's any errors before getting the JSON output, + const context = { ... } probably needs more data. */ import { program } from 'commander' import fs from 'fs/promises' -import { pick } from 'lodash-es' - -import { loadPages, loadPageMap } from '../../lib/page-data.js' -import loadSiteData from '../../lib/site-data.js' -import loadRedirects from '../../lib/redirects/precompile.js' -import { allVersions, allVersionKeys } from '../../lib/all-versions.js' -import { languageKeys } from '../../lib/languages.js' -import { getProductStringFromPath } from '../../lib/path-utils.js' program .description('Create a translation health report for one language.') .requiredOption('-l, --language ', 'The language to health check') - .requiredOption('-r, --gitref ', 'Language repo latest git commit short SHA') + .option('-r, --gitref ', 'Language repo latest git commit short SHA') .parse(process.argv) -// Gather popularity data the search uses to prioritize errors -async function fetchPopularityData() { - const output = {} - const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8') - for (const line of popularPagesRaw.split('\n')) { - try { - const row = JSON.parse(line) - output[row.path_article] = row.path_count - } catch {} - } - return output -} +// Throw errors instead of falling back to English +process.env.DEBUG_TRANSLATION_FALLBACKS = true +// The error option stops everything, but we want it to continue to generate the full report +process.env.ENABLED_LANGUAGES = `en,${program.opts().language}` -async function collectPageErrors(page, { language, data, redirects, plainPath, pageMap }) { - // Go through each version... - const promises = allVersionKeys - .filter((version) => page.applicableVersions.includes(version)) - .map(async (version) => { - // Collect if errors - const pageVersionErrors = [] - try { - const path = `/${language}/${version}/${plainPath}` - // Reference middleware/context.js for data shape - const context = { - ...data, // needed for all pages - currentVersion: version, // needed for all pages - currentLanguage: language, // needed for all pages - currentPath: path, // needed for all pages - currentVersionObj: allVersions[version], // needed for ifversion tag - currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages - pages: pageMap, // needed for learning-track on guides pages - redirects, // needed for learning-track on guides pages - } - await page.render(context, pageVersionErrors) - } catch (err) { - pageVersionErrors.push(err) - } - if (pageVersionErrors.length) { - return [ - version, - // Filter down properties to make it easier for - // translators to get the clearest information on the error - pageVersionErrors.map((err) => pick(err, ['name', 'message', 'token.content'])), - ] - // Other fields: Object.getOwnPropertyNames(err) - } - }) - const arr = (await Promise.all(promises)).filter(Boolean) - if (arr.length) { - return Object.fromEntries(arr) - } -} +// In debug mode, it will call console.warn ... so overriding :) +// Want to make sure the result is valid JSON +const prevConsoleWarn = console.warn +const prevConsoleError = console.error -function groupErrors(errors) { - return errors - .map((page) => Object.values(page.versions).flat()) - .flat() - .map((version) => version.message) - .reduce((sum, val) => { - sum[val] = sum[val] || 0 - sum[val]++ - return sum - }, {}) +let issues = [] +console.warn = console.error = (...args) => { + if (args.length > 1) { + issues.push({ message: args.map(String).join(' '), score: 0 }) + } else if (typeof args[0] === 'string') { + issues.push({ message: args[0], score: 0 }) + } else if (args[0]?.constructor === Object) { + const path = args[0].path?.replace('/index.md', '').replace('.md', '') + issues.push({ path, message: args[0].message, score: scores[path] || 0 }) + } } -async function createReport() { - // Check that the language is valid - const { language, gitref } = program.opts() - if (!languageKeys.includes(language)) { - throw new Error(`Language ${language} is not in ${languageKeys.join()}.`) - } +// Weird import syntax, but forces it to load after process.env... changes +const { languageKeys } = await import('../../lib/languages.js') +const { loadPages, loadPageMap } = await import('../../lib/page-data.js') +const { precompileRedirects } = await import('../../lib/redirects/precompile.js') +const { allVersions, allVersionKeys } = await import('../../lib/all-versions.js') +const { getProductStringFromPath } = await import('../../lib/path-utils.js') - // Load popularity data to sort errors - const popularity = await fetchPopularityData() +// Check that the language is valid +const { language, gitref } = program.opts() +if (!languageKeys.includes(language)) { + throw new Error(`Language ${language} is not in ${languageKeys.join()}.`) +} - // Load all pages - const allPages = await loadPages() - const dataErrors = [] - const data = loadSiteData(dataErrors)[language] - const pages = allPages - .filter((page) => page.languageCode === language) - // Early access pages log to the console, which would show in the report - .filter((page) => !page.relativePath.includes('early-access')) - const pageMap = await loadPageMap(pages) - const redirects = await loadRedirects(pages) +// Gather popularity data the search uses to prioritize errors +const scores = {} +const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8') +for (const line of popularPagesRaw.split('\n')) { + try { + const row = JSON.parse(line) + scores[row.path_article] = row.path_count + } catch {} +} - // Try to render each page - const pageErrors = ( - await Promise.all( - pages.map(async (page) => { - const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '') - const errorsByVersion = await collectPageErrors(page, { - language, - data, - redirects, - plainPath, - pageMap, - }) - if (errorsByVersion) { - return { - path: plainPath, - popularity: popularity[plainPath] || 0, - versions: errorsByVersion, - } - } - }) - ) - ) - .filter(Boolean) - // Sort by popularity desc so the translators know what to focus on first - .sort((a, b) => b.popularity - a.popularity) +// Load all pages in language +const allPages = await loadPages() +const pages = allPages.filter((page) => page.languageCode === language) +const pageMap = await loadPageMap(pages) +const redirects = await precompileRedirects(pages) - // Begin an output report - const report = { - language, - gitref, - datetime: new Date().toJSON(), - totalPages: pages.length, - totalErrorPages: pageErrors.length, - pageErrors, - // To group errors by message instead - groupedPageErrors: groupErrors(pageErrors), - // Filter down properties to make it easier for - // translators to get the clearest information on the error - dataErrors: dataErrors.map((err) => pick(err, ['name', 'message', 'token.content'])), +// Try to render each page +for (const page of pages) { + const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '') + // Go through each version... + const versions = allVersionKeys.filter((version) => page.applicableVersions.includes(version)) + const pageIssues = {} + for (const version of versions) { + const path = `/${language}/${version}/${plainPath}` + // Reference middleware/context.js for shape + const context = { + currentVersion: version, // needed for all pages + currentLanguage: language, // needed for all pages + currentPath: path, // needed for all pages + currentVersionObj: allVersions[version], // needed for ifversion tag + currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages + pages: pageMap, // needed for learning-track on guides pages + redirects, // needed for learning-track on guides pages + } + try { + await page.render(context) + } catch (err) { + // Which messages apply to which versions + pageIssues[err.message] = pageIssues[err.message] || [] + pageIssues[err.message].push(version) + } + } + if (Object.keys(pageIssues).length) { + issues.push({ + path: plainPath, + messages: pageIssues, + score: scores[plainPath] || 0, + }) } +} + +// Sort by score desc so the translators know what to focus on first +// Issues with more information should be higher +issues = issues + .filter((issue) => !issue.message?.includes('early-access')) + .sort((a, b) => b.score - a.score || JSON.stringify(b).length - JSON.stringify(a).length) - return report +// Begin an output report +const report = { + language, + gitref, + datetime: new Date().toJSON(), + issuesCount: issues.length, + issues, } -console.warn = () => {} // shhh -console.log(JSON.stringify(await createReport(), null, 2)) +console.warn = prevConsoleWarn +console.error = prevConsoleError +console.log(JSON.stringify(report, null, 2))