-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcovparser.js
More file actions
72 lines (67 loc) · 2.32 KB
/
covparser.js
File metadata and controls
72 lines (67 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
// parse City of Vancouver sources //
const cheerio = require('cheerio')
const request = require('request-promise-native');
function getCovSources() {
return request
.get({
uri: "http://data.vancouver.ca/datacatalogue/index.htm",
resolveWithFullResponse: true
})
.then( (response) => {
var $ = cheerio.load(response.body);
var sources = $(".catalogTable tbody tr").map( (i, row) => {
// row must have children
var rowChildren = $(row).children();
var availableFormats = rowChildren.filter( (i, rChild) => {
return $(rChild).children().length > 0 && i > 0 // filter label
})
availableFormats = availableFormats.map( (i, rChild) => {
return $(rChild).find('a').map( (i, a) => {
var href = $(a).attr('href');
var f = $(a).find('span').text();
if (f == "") {
f = $(rChild).find('span').text();
}
var reg2 = /.htm/;
var reg3 = /^ftp/;
// NO indirect htm sources for now, as well as ftp access
if (!reg2.test(href) && !reg3.test(href)) {
return { format: f.toLowerCase(), url: href }
}
}).toArray()
})
if (rowChildren.length > 1 && i > 0 ) {
// console.log($(row).find('> td a'))
return {
tag: $(rowChildren[0]).children().first().text(),
formats: availableFormats.toArray(),
src: "vancouver"
}
}
});
return sources.toArray().filter((src) => {return src != undefined});
})
}
function getCovSourcesByTag(tag) {
return getCovSources().then( (srcs) => {
// console.log("COV sources: ")
// for (src of srcs) {
// console.log(src.tag)
// }
var trimmed = []
if (tag != "" && tag != undefined) {
trimmed = srcs.filter( (src, i) => {
// very basic 'tag' filter
return src.formats.length > 0 && src.tag.toLowerCase().indexOf(tag) > -1
})
}
var reg = /^\.\./;
for (src of trimmed) {
for (f of src.formats) {
f.url = f.url.replace(reg, "http://data.vancouver.ca");
}
}
return trimmed;
});
}
module.exports = { getCovSources, getCovSourcesByTag };