Skip to content

dx-31: daily CI cron to recursively scan docs site and check for broken urls #3767

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,21 @@ jobs:
- attach_workspace:
at: ~/repo
- link-check-changed-files

broken-link-check-prod:
executor: node-executor
working_directory: ~/repo
steps:
- checkout
- restore_cache:
keys:
- yarn-cache-{{ checksum "yarn.lock" }}-{{ checksum "patches/github-slugger+1.3.0.patch" }}-{{ checksum "patches/vue-scrollactive+0.9.3.patch" }}-{{ checksum "patches/@docsearch+js+1.0.0-alpha.28.patch" }}-{{ checksum "patches/@nuxt+vue-app+2.14.12.patch" }}
- run:
name: Install Dependencies
command: yarn --frozen-lockfile
- run:
name: Broken link checker
command: yarn broken-link-checker:prod

release:
executor: node-executor
Expand Down Expand Up @@ -110,3 +125,14 @@ workflows:
branches:
only:
- master

nightly:
triggers:
- schedule:
cron: "0 0 * * *"
filters:
branches:
only:
- master
jobs:
- broken-link-check-prod
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"start": "cross-env NODE_ENV=development nuxt",
"start:ci": "cross-env NODE_ENV=test yarn build && cd dist/ && yarn serve:dist",
"changed-files-broken-link-checker:ci": "node scripts/changedFilesBrokenLinkChecker.js",
"broken-link-checker:prod": "node scripts/recursiveBrokenLinkChecker.js",
"serve:dist": "node scripts/server.js",
"lint:js": "eslint --ext .js,.vue --ignore-path .gitignore .",
"lint:style": "stylelint **/*.{vue,css} --ignore-path .gitignore",
Expand Down
22 changes: 2 additions & 20 deletions scripts/changedFilesBrokenLinkChecker.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,8 @@
const { execSync, fork } = require('child_process')
const { HtmlUrlChecker } = require('broken-link-checker')
const chalk = require('chalk')

const logger = {
log: (...args) => {
console.log(`${chalk.yellow(`[${new Date().toISOString()}]:`)} `, ...args)
},
error: (...args) => {
console.error(
`${chalk.bgRed(`[${new Date().toISOString()}]: ⛔️ ERROR:`)} `,
...args
)
},
}
const { logger } = require('./utils/logger')
const { prettyPrintStatusCode } = require('./utils/prettyPrintStatusCode')

const GIT_DIFF_NAME_STATUS_LAST_COMMIT = 'git diff --name-status HEAD~1'
const MARKDOWN_EXTENSION = '.md'
Expand Down Expand Up @@ -138,14 +128,6 @@ const getGitDiffList = () => {
return []
}

const prettyPrintStatusCode = (statusCode) => {
if (statusCode >= 400) {
return chalk.bgRed(`ERROR ⛔️`)
}

return chalk.green(`OK ✅`)
}

const makeSiteCheckerForUrl = (url) => {
return async () => {
return new Promise((resolve, reject) => {
Expand Down
103 changes: 103 additions & 0 deletions scripts/recursiveBrokenLinkChecker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/* eslint-disable no-console */
const { SiteChecker } = require('broken-link-checker')
const chalk = require('chalk')
const { logger } = require('./utils/logger')
const { prettyPrintStatusCode } = require('./utils/prettyPrintStatusCode')

const DOCS_SITE_URL = 'https://docs.cypress.io/'

const makeSiteChecker = () => {
return new Promise((resolve, reject) => {
/**
* The promise resolves the following:
* @type Array<{ originUrl: string, brokenUrl: string }>
*/
let brokenLinkRecords = []
let numLinksChecked = 0
const siteChecker = new SiteChecker(
{
excludeExternalLinks: true,
honorRobotExclusions: false,
},
{
error: (error) => {
logger.error('An error occurred', error)
},
html: (tree, robots, response, pageUrl) => {
const currentUrl = response.url

const htmlNode = tree.childNodes.find(
(node) => node.tagName === 'html'
)
const headNode = htmlNode.childNodes.find(
(node) => node.tagName === 'head'
)
const titleNode = headNode.childNodes.find(
(node) => node.tagName === 'title'
)
const titleTextNode = titleNode.childNodes.find(
(node) => node.nodeName === '#text'
)
const is404 = titleTextNode.value.includes(
'404 | Cypress Documentation'
)

if (is404) {
logger.error(
`Broken link found on page ${currentUrl}: ${chalk.bgRed(pageUrl)}`
)

brokenLinkRecords.push({
originUrl: currentUrl,
brokenUrl: pageUrl,
})
}
},
link: (link) => {
logger.log(
`${prettyPrintStatusCode(link.http.statusCode)} ${
link.url.resolved
}`
)

numLinksChecked++
},
end: () => {
logger.log(`Finished scanning url ${DOCS_SITE_URL}`)
logger.log(`Number of links checked: ${numLinksChecked}`)
resolve(brokenLinkRecords)
},
}
)

logger.log(`🔗 Starting link checker for url: ${DOCS_SITE_URL}`)
siteChecker.enqueue(DOCS_SITE_URL)
})
}

const main = async () => {
console.time('recursiveBrokenLinkChecker')

const brokenLinkRecords = await makeSiteChecker()

logger.log(
`Number of broken URLs found: ${
brokenLinkRecords.length
? `${chalk.bgRed(brokenLinkRecords.length)}`
: `${chalk.green(brokenLinkRecords.length)} ✅`
}`
)

brokenLinkRecords.forEach(({ originUrl, brokenUrl }) => {
logger.error(`************************`)
logger.error(`Broken URL on page: ${originUrl}`)
logger.error(`Broken URL: ${brokenUrl}`)
})

console.timeEnd('recursiveBrokenLinkChecker')
if (brokenLinkRecords.length) {
process.exit(1)
}
}

main()
14 changes: 14 additions & 0 deletions scripts/utils/logger.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* eslint-disable no-console */
const chalk = require('chalk')

module.exports.logger = {
log: (...args) => {
console.log(`${chalk.yellow(`[${new Date().toISOString()}]:`)} `, ...args)
},
error: (...args) => {
console.error(
`${chalk.bgRed(`[${new Date().toISOString()}]: ⛔️ ERROR:`)} `,
...args
)
},
}
9 changes: 9 additions & 0 deletions scripts/utils/prettyPrintStatusCode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const chalk = require('chalk')

module.exports.prettyPrintStatusCode = (statusCode) => {
if (statusCode >= 400) {
return chalk.bgRed(`ERROR ⛔️`)
}

return chalk.green(`OK ✅`)
}