diff --git a/.circleci/config.yml b/.circleci/config.yml index 2f0c0ffeb6..4040eed7df 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -56,6 +56,21 @@ jobs: - attach_workspace: at: ~/repo - link-check-changed-files + + broken-link-check-prod: + executor: node-executor + working_directory: ~/repo + steps: + - checkout + - restore_cache: + keys: + - yarn-cache-{{ checksum "yarn.lock" }}-{{ checksum "patches/github-slugger+1.3.0.patch" }}-{{ checksum "patches/vue-scrollactive+0.9.3.patch" }}-{{ checksum "patches/@docsearch+js+1.0.0-alpha.28.patch" }}-{{ checksum "patches/@nuxt+vue-app+2.14.12.patch" }} + - run: + name: Install Dependencies + command: yarn --frozen-lockfile + - run: + name: Broken link checker + command: yarn broken-link-checker:prod release: executor: node-executor @@ -110,3 +125,14 @@ workflows: branches: only: - master + + nightly: + triggers: + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - master + jobs: + - broken-link-check-prod diff --git a/package.json b/package.json index ebb82e1731..4e8c41bfa4 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "start": "cross-env NODE_ENV=development nuxt", "start:ci": "cross-env NODE_ENV=test yarn build && cd dist/ && yarn serve:dist", "changed-files-broken-link-checker:ci": "node scripts/changedFilesBrokenLinkChecker.js", + "broken-link-checker:prod": "node scripts/recursiveBrokenLinkChecker.js", "serve:dist": "node scripts/server.js", "lint:js": "eslint --ext .js,.vue --ignore-path .gitignore .", "lint:style": "stylelint **/*.{vue,css} --ignore-path .gitignore", diff --git a/scripts/changedFilesBrokenLinkChecker.js b/scripts/changedFilesBrokenLinkChecker.js index 3a22e28896..e594d702d7 100644 --- a/scripts/changedFilesBrokenLinkChecker.js +++ b/scripts/changedFilesBrokenLinkChecker.js @@ -2,18 +2,8 @@ const { execSync, fork } = require('child_process') const { HtmlUrlChecker } = require('broken-link-checker') const chalk = require('chalk') - -const logger = { - log: (...args) => { - console.log(`${chalk.yellow(`[${new Date().toISOString()}]:`)} `, ...args) - }, - error: (...args) => { - console.error( - `${chalk.bgRed(`[${new Date().toISOString()}]: ⛔️ ERROR:`)} `, - ...args - ) - }, -} +const { logger } = require('./utils/logger') +const { prettyPrintStatusCode } = require('./utils/prettyPrintStatusCode') const GIT_DIFF_NAME_STATUS_LAST_COMMIT = 'git diff --name-status HEAD~1' const MARKDOWN_EXTENSION = '.md' @@ -138,14 +128,6 @@ const getGitDiffList = () => { return [] } -const prettyPrintStatusCode = (statusCode) => { - if (statusCode >= 400) { - return chalk.bgRed(`ERROR ⛔️`) - } - - return chalk.green(`OK ✅`) -} - const makeSiteCheckerForUrl = (url) => { return async () => { return new Promise((resolve, reject) => { diff --git a/scripts/recursiveBrokenLinkChecker.js b/scripts/recursiveBrokenLinkChecker.js new file mode 100644 index 0000000000..3289dbee39 --- /dev/null +++ b/scripts/recursiveBrokenLinkChecker.js @@ -0,0 +1,103 @@ +/* eslint-disable no-console */ +const { SiteChecker } = require('broken-link-checker') +const chalk = require('chalk') +const { logger } = require('./utils/logger') +const { prettyPrintStatusCode } = require('./utils/prettyPrintStatusCode') + +const DOCS_SITE_URL = 'https://docs.cypress.io/' + +const makeSiteChecker = () => { + return new Promise((resolve, reject) => { + /** + * The promise resolves the following: + * @type Array<{ originUrl: string, brokenUrl: string }> + */ + let brokenLinkRecords = [] + let numLinksChecked = 0 + const siteChecker = new SiteChecker( + { + excludeExternalLinks: true, + honorRobotExclusions: false, + }, + { + error: (error) => { + logger.error('An error occurred', error) + }, + html: (tree, robots, response, pageUrl) => { + const currentUrl = response.url + + const htmlNode = tree.childNodes.find( + (node) => node.tagName === 'html' + ) + const headNode = htmlNode.childNodes.find( + (node) => node.tagName === 'head' + ) + const titleNode = headNode.childNodes.find( + (node) => node.tagName === 'title' + ) + const titleTextNode = titleNode.childNodes.find( + (node) => node.nodeName === '#text' + ) + const is404 = titleTextNode.value.includes( + '404 | Cypress Documentation' + ) + + if (is404) { + logger.error( + `Broken link found on page ${currentUrl}: ${chalk.bgRed(pageUrl)}` + ) + + brokenLinkRecords.push({ + originUrl: currentUrl, + brokenUrl: pageUrl, + }) + } + }, + link: (link) => { + logger.log( + `${prettyPrintStatusCode(link.http.statusCode)} ${ + link.url.resolved + }` + ) + + numLinksChecked++ + }, + end: () => { + logger.log(`Finished scanning url ${DOCS_SITE_URL}`) + logger.log(`Number of links checked: ${numLinksChecked}`) + resolve(brokenLinkRecords) + }, + } + ) + + logger.log(`🔗 Starting link checker for url: ${DOCS_SITE_URL}`) + siteChecker.enqueue(DOCS_SITE_URL) + }) +} + +const main = async () => { + console.time('recursiveBrokenLinkChecker') + + const brokenLinkRecords = await makeSiteChecker() + + logger.log( + `Number of broken URLs found: ${ + brokenLinkRecords.length + ? `${chalk.bgRed(brokenLinkRecords.length)}` + : `${chalk.green(brokenLinkRecords.length)} ✅` + }` + ) + + brokenLinkRecords.forEach(({ originUrl, brokenUrl }) => { + logger.error(`************************`) + logger.error(`Broken URL on page: ${originUrl}`) + logger.error(`Broken URL: ${brokenUrl}`) + }) + + console.timeEnd('recursiveBrokenLinkChecker') + if (brokenLinkRecords.length) { + process.exit(1) + } +} + +main() diff --git a/scripts/utils/logger.js b/scripts/utils/logger.js new file mode 100644 index 0000000000..66b1ea1215 --- /dev/null +++ b/scripts/utils/logger.js @@ -0,0 +1,14 @@ +/* eslint-disable no-console */ +const chalk = require('chalk') + +module.exports.logger = { + log: (...args) => { + console.log(`${chalk.yellow(`[${new Date().toISOString()}]:`)} `, ...args) + }, + error: (...args) => { + console.error( + `${chalk.bgRed(`[${new Date().toISOString()}]: ⛔️ ERROR:`)} `, + ...args + ) + }, +} diff --git a/scripts/utils/prettyPrintStatusCode.js b/scripts/utils/prettyPrintStatusCode.js new file mode 100644 index 0000000000..355a99b02f --- /dev/null +++ b/scripts/utils/prettyPrintStatusCode.js @@ -0,0 +1,9 @@ +const chalk = require('chalk') + +module.exports.prettyPrintStatusCode = (statusCode) => { + if (statusCode >= 400) { + return chalk.bgRed(`ERROR ⛔️`) + } + + return chalk.green(`OK ✅`) +}