Skip to content

Commit cd21ce4

Browse files
authored
Merge pull request #3767 from cypress-io/dx-31-daily-ci-cron-to-check-all-links-with-a
dx-31: daily CI cron to recursively scan docs site and check for broken urls
2 parents b21e086 + 129b89c commit cd21ce4

File tree

6 files changed

+155
-20
lines changed

6 files changed

+155
-20
lines changed

.circleci/config.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,21 @@ jobs:
5656
- attach_workspace:
5757
at: ~/repo
5858
- link-check-changed-files
59+
60+
broken-link-check-prod:
61+
executor: node-executor
62+
working_directory: ~/repo
63+
steps:
64+
- checkout
65+
- restore_cache:
66+
keys:
67+
- yarn-cache-{{ checksum "yarn.lock" }}-{{ checksum "patches/github-slugger+1.3.0.patch" }}-{{ checksum "patches/vue-scrollactive+0.9.3.patch" }}-{{ checksum "patches/@docsearch+js+1.0.0-alpha.28.patch" }}-{{ checksum "patches/@nuxt+vue-app+2.14.12.patch" }}
68+
- run:
69+
name: Install Dependencies
70+
command: yarn --frozen-lockfile
71+
- run:
72+
name: Broken link checker
73+
command: yarn broken-link-checker:prod
5974

6075
release:
6176
executor: node-executor
@@ -110,3 +125,14 @@ workflows:
110125
branches:
111126
only:
112127
- master
128+
129+
nightly:
130+
triggers:
131+
- schedule:
132+
cron: "0 0 * * *"
133+
filters:
134+
branches:
135+
only:
136+
- master
137+
jobs:
138+
- broken-link-check-prod

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"start": "cross-env NODE_ENV=development nuxt",
1414
"start:ci": "cross-env NODE_ENV=test yarn build && cd dist/ && yarn serve:dist",
1515
"changed-files-broken-link-checker:ci": "node scripts/changedFilesBrokenLinkChecker.js",
16+
"broken-link-checker:prod": "node scripts/recursiveBrokenLinkChecker.js",
1617
"serve:dist": "node scripts/server.js",
1718
"lint:js": "eslint --ext .js,.vue --ignore-path .gitignore .",
1819
"lint:style": "stylelint **/*.{vue,css} --ignore-path .gitignore",

scripts/changedFilesBrokenLinkChecker.js

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,8 @@
22
const { execSync, fork } = require('child_process')
33
const { HtmlUrlChecker } = require('broken-link-checker')
44
const chalk = require('chalk')
5-
6-
const logger = {
7-
log: (...args) => {
8-
console.log(`${chalk.yellow(`[${new Date().toISOString()}]:`)} `, ...args)
9-
},
10-
error: (...args) => {
11-
console.error(
12-
`${chalk.bgRed(`[${new Date().toISOString()}]: ⛔️ ERROR:`)} `,
13-
...args
14-
)
15-
},
16-
}
5+
const { logger } = require('./utils/logger')
6+
const { prettyPrintStatusCode } = require('./utils/prettyPrintStatusCode')
177

188
const GIT_DIFF_NAME_STATUS_LAST_COMMIT = 'git diff --name-status HEAD~1'
199
const MARKDOWN_EXTENSION = '.md'
@@ -138,14 +128,6 @@ const getGitDiffList = () => {
138128
return []
139129
}
140130

141-
const prettyPrintStatusCode = (statusCode) => {
142-
if (statusCode >= 400) {
143-
return chalk.bgRed(`ERROR ⛔️`)
144-
}
145-
146-
return chalk.green(`OK ✅`)
147-
}
148-
149131
const makeSiteCheckerForUrl = (url) => {
150132
return async () => {
151133
return new Promise((resolve, reject) => {

scripts/recursiveBrokenLinkChecker.js

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/* eslint-disable no-console */
2+
const { SiteChecker } = require('broken-link-checker')
3+
const chalk = require('chalk')
4+
const { logger } = require('./utils/logger')
5+
const { prettyPrintStatusCode } = require('./utils/prettyPrintStatusCode')
6+
7+
const DOCS_SITE_URL = 'https://docs.cypress.io/'
8+
9+
const makeSiteChecker = () => {
10+
return new Promise((resolve, reject) => {
11+
/**
12+
* The promise resolves the following:
13+
* @type Array<{ originUrl: string, brokenUrl: string }>
14+
*/
15+
let brokenLinkRecords = []
16+
let numLinksChecked = 0
17+
const siteChecker = new SiteChecker(
18+
{
19+
excludeExternalLinks: true,
20+
honorRobotExclusions: false,
21+
},
22+
{
23+
error: (error) => {
24+
logger.error('An error occurred', error)
25+
},
26+
html: (tree, robots, response, pageUrl) => {
27+
const currentUrl = response.url
28+
29+
const htmlNode = tree.childNodes.find(
30+
(node) => node.tagName === 'html'
31+
)
32+
const headNode = htmlNode.childNodes.find(
33+
(node) => node.tagName === 'head'
34+
)
35+
const titleNode = headNode.childNodes.find(
36+
(node) => node.tagName === 'title'
37+
)
38+
const titleTextNode = titleNode.childNodes.find(
39+
(node) => node.nodeName === '#text'
40+
)
41+
const is404 = titleTextNode.value.includes(
42+
'404 | Cypress Documentation'
43+
)
44+
45+
if (is404) {
46+
logger.error(
47+
`Broken link found on page ${currentUrl}: ${chalk.bgRed(pageUrl)}`
48+
)
49+
50+
brokenLinkRecords.push({
51+
originUrl: currentUrl,
52+
brokenUrl: pageUrl,
53+
})
54+
}
55+
},
56+
link: (link) => {
57+
logger.log(
58+
`${prettyPrintStatusCode(link.http.statusCode)} ${
59+
link.url.resolved
60+
}`
61+
)
62+
63+
numLinksChecked++
64+
},
65+
end: () => {
66+
logger.log(`Finished scanning url ${DOCS_SITE_URL}`)
67+
logger.log(`Number of links checked: ${numLinksChecked}`)
68+
resolve(brokenLinkRecords)
69+
},
70+
}
71+
)
72+
73+
logger.log(`🔗 Starting link checker for url: ${DOCS_SITE_URL}`)
74+
siteChecker.enqueue(DOCS_SITE_URL)
75+
})
76+
}
77+
78+
const main = async () => {
79+
console.time('recursiveBrokenLinkChecker')
80+
81+
const brokenLinkRecords = await makeSiteChecker()
82+
83+
logger.log(
84+
`Number of broken URLs found: ${
85+
brokenLinkRecords.length
86+
? `${chalk.bgRed(brokenLinkRecords.length)}`
87+
: `${chalk.green(brokenLinkRecords.length)} ✅`
88+
}`
89+
)
90+
91+
brokenLinkRecords.forEach(({ originUrl, brokenUrl }) => {
92+
logger.error(`************************`)
93+
logger.error(`Broken URL on page: ${originUrl}`)
94+
logger.error(`Broken URL: ${brokenUrl}`)
95+
})
96+
97+
console.timeEnd('recursiveBrokenLinkChecker')
98+
if (brokenLinkRecords.length) {
99+
process.exit(1)
100+
}
101+
}
102+
103+
main()

scripts/utils/logger.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/* eslint-disable no-console */
2+
const chalk = require('chalk')
3+
4+
module.exports.logger = {
5+
log: (...args) => {
6+
console.log(`${chalk.yellow(`[${new Date().toISOString()}]:`)} `, ...args)
7+
},
8+
error: (...args) => {
9+
console.error(
10+
`${chalk.bgRed(`[${new Date().toISOString()}]: ⛔️ ERROR:`)} `,
11+
...args
12+
)
13+
},
14+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
const chalk = require('chalk')
2+
3+
module.exports.prettyPrintStatusCode = (statusCode) => {
4+
if (statusCode >= 400) {
5+
return chalk.bgRed(`ERROR ⛔️`)
6+
}
7+
8+
return chalk.green(`OK ✅`)
9+
}

0 commit comments

Comments
 (0)