-
Notifications
You must be signed in to change notification settings - Fork 207
/
Copy pathlink-checker.js
123 lines (99 loc) · 3.56 KB
/
link-checker.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import fs from 'fs'
import path from 'path'
// check for args
if (process.argv.length < 3) {
console.log('Usage: node link-checker.js <directory>')
process.exit(1)
}
const directory = process.argv[2]
function findAllMarkdownFiles(directory) {
// find all .ipynb files (also recursively)
const files = []
function findMarkdown(dir) {
const items = fs.readdirSync(dir)
// check if dir is symlink
const stat = fs.lstatSync(dir)
if (stat.isSymbolicLink()) {
return
}
for (const item of items) {
const itemPath = path.join(dir, item)
const stat = fs.statSync(itemPath)
if (stat.isDirectory()) {
findMarkdown(itemPath)
} else if (item.endsWith('.md')) {
files.push(itemPath)
}
}
}
findMarkdown(directory)
return files
}
console.log(`Checking links in ${directory}...`)
const markdownFiles = findAllMarkdownFiles(directory)
markdownFiles.forEach(file => {
// read and find all markdown links
const contents = fs.readFileSync(file, 'utf-8')
const lines = contents.split('\n')
let in_grammar_code_block = false
lines.forEach((line, lineno) => {
const links = line.match(/\[.*?\]\((.*?)\)/g)
lineno = lineno + 1
if (line.includes("```grammar")) {
in_grammar_code_block = true
} else if (line.includes("``\n`")) {
in_grammar_code_block = false
}
// do not scan grammar code block (custom page linking)
if (in_grammar_code_block) {
return;
}
// if in docs/node_modules, ignore
if (file.includes('docs/node_modules')) {
return
}
if (links) {
links.forEach(link => {
// remove markdown formatting
let url = link.substring(link.indexOf('(') + 1, link.lastIndexOf(')'))
if (url.startsWith("#")) {
// ignore internal links
return
}
if (url.startsWith("https://lmql.ai/docs")) {
console.log(`File ${file}:${lineno} contains direct link to lmql.ai/docs: ${url}`)
return
}
if (url.startsWith("http") || url.startsWith("mailto")) {
// ignore external links
return
}
// if it has # in it, remove it
if (url.includes('#')) {
url = url.substring(0, url.indexOf('#'))
}
// replace .html with .md
if (url.endsWith('.html')) {
url = url.replace('.html', '.md')
}
let basedir = path.dirname(file)
if (url.includes("]")) {
return;
}
if (url.startsWith("/")) {
basedir = directory
url = url.substring(1)
}
// check if url exists
try {
const stat = fs.statSync(path.join(basedir, url))
if (!stat.isFile()) {
console.log(`File ${file}:${lineno} contains invalid link: ${url}, not a file: ${path.join(basedir, url)}`)
}
} catch (e) {
console.log(`File ${file}:${lineno} contains invalid link: ${url}. Could not find ${path.join(basedir, url)}`)
}
})
}
})
})