-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.js
More file actions
90 lines (77 loc) · 2.27 KB
/
index.js
File metadata and controls
90 lines (77 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
const prune = require("json-prune");
function parseHTML(html) {
const htmlObject = {};
const tagRegex = /<(?!!)(?!meta)([^/][^>]+)>/g;
const attributeRegex = /(\S+)="([^"]+)"/g;
const styleRegex = /style="([^"]+)"/;
const stack = [htmlObject];
let match;
let lastIndex = 0;
while ((match = tagRegex.exec(html))) {
const tag = match[1];
const parent = stack[stack.length - 1];
if (tag.startsWith("/")) {
stack.pop();
} else {
const element = { tag: tag.split(" ")[0] };
let attrMatch;
while ((attrMatch = attributeRegex.exec(tag))) {
const [_, attr, value] = attrMatch;
if (attr === "style") {
const styleMatch = styleRegex.exec(tag);
if (styleMatch) {
const styleAttrs = styleMatch[1].split(";");
styleAttrs.forEach((styleAttr) => {
const [styleProp, styleValue] = styleAttr
.split(":")
.map((s) => s.trim());
if (styleProp && styleValue) {
if (!element.style) element.style = {};
element.style[styleProp] = styleValue;
}
});
}
} else {
element[attr] = value;
}
}
if (parent.children) parent.children.push(element);
else parent.children = [element];
stack.push(element);
lastIndex = tagRegex.lastIndex;
const textContent = html
.substring(lastIndex, html.indexOf("<", lastIndex))
.trim();
if (textContent) {
element.text = textContent;
}
}
}
return htmlObject.children[0];
}
function main() {
if (typeof window === "undefined") {
const fs = require("fs");
const fileName = process.argv[2];
if (!fileName) {
console.error("Please provide an HTML file as an argument.");
process.exit(1);
}
fs.readFile(fileName, "utf8", (err, data) => {
if (err) {
console.error(`Error reading the file: ${err}`);
process.exit(1);
}
const htmlObject = parseHTML(data);
try {
console.log(JSON.stringify(htmlObject));
} catch (e) {
console.log(prune(htmlObject));
}
});
}
}
if (typeof window === "undefined") {
module.exports = main;
module.exports.parseHTML = parseHTML;
}