|
| 1 | +const client = require('prom-client'); |
| 2 | +const yaml = require('yamljs'); |
| 3 | +const geoip = require('geoip-lite'); |
| 4 | +const rawSpec = yaml.load(`${__dirname}/../../docs/specification/description.yml`); |
| 5 | + |
| 6 | +// --------------------------------------------------------------------------- |
| 7 | +// Registry & default metrics |
| 8 | +// --------------------------------------------------------------------------- |
| 9 | + |
| 10 | +const register = new client.Registry(); |
| 11 | +client.collectDefaultMetrics({ register }); |
| 12 | + |
| 13 | +// --------------------------------------------------------------------------- |
| 14 | +// Custom HTTP metrics |
| 15 | +// --------------------------------------------------------------------------- |
| 16 | + |
| 17 | +const httpRequestsTotal = new client.Counter({ |
| 18 | + name: 'http_requests_total', |
| 19 | + help: 'Total number of HTTP requests', |
| 20 | + labelNames: ['method', 'route', 'status_code', 'projectAccessionOrID', 'UniProtID', 'PubChemID', 'PDBID', 'filename', 'analysisName'], |
| 21 | + registers: [register], |
| 22 | +}); |
| 23 | + |
| 24 | +const httpRequestDuration = new client.Histogram({ |
| 25 | + name: 'http_request_duration_seconds', |
| 26 | + help: 'Duration of HTTP requests in seconds', |
| 27 | + labelNames: ['method', 'route', 'status_code', 'projectAccessionOrID', 'UniProtID', 'PubChemID', 'PDBID', 'filename', 'analysisName'], |
| 28 | + buckets: [ 1, 50, 100, 500], |
| 29 | + registers: [register], |
| 30 | +}); |
| 31 | + |
| 32 | +const httpGeoRequestsTotal = new client.Counter({ |
| 33 | + name: 'http_geo_requests_total', |
| 34 | + help: 'Total number of HTTP requests by geographic location', |
| 35 | + labelNames: ['country', 'region', 'city'], |
| 36 | + registers: [register], |
| 37 | +}); |
| 38 | + |
| 39 | +// --------------------------------------------------------------------------- |
| 40 | +// Path normalizer built from the OpenAPI spec |
| 41 | +// --------------------------------------------------------------------------- |
| 42 | + |
| 43 | +// Convert a spec path like /projects/{id}/files/{file} into a RegExp |
| 44 | +// and an ordered list of param names, so we can match real URLs back to |
| 45 | +// the template. More-specific paths (fewer placeholders) are tried first. |
| 46 | +function buildMatchers(basePaths) { |
| 47 | + const specPaths = Object.keys((rawSpec && rawSpec.paths) || {}); |
| 48 | + |
| 49 | + // Pre-compile each spec path once |
| 50 | + const compiled = specPaths.map(specPath => { |
| 51 | + const paramNames = []; |
| 52 | + const regexSource = specPath |
| 53 | + .replace(/\{([^}]+)\}/g, (_, name) => { |
| 54 | + paramNames.push(name); |
| 55 | + return '([^/]+)'; |
| 56 | + }) |
| 57 | + // Escape dots in the static parts that remain |
| 58 | + .replace(/\./g, '\\.'); |
| 59 | + return { |
| 60 | + specPath, |
| 61 | + paramNames, |
| 62 | + paramCount: paramNames.length, |
| 63 | + re: new RegExp(`^${regexSource}$`), |
| 64 | + }; |
| 65 | + }); |
| 66 | + |
| 67 | + // Sort: fewer placeholders = more specific = tried first |
| 68 | + compiled.sort((a, b) => a.paramCount - b.paramCount); |
| 69 | + |
| 70 | + return { compiled, basePaths }; |
| 71 | +} |
| 72 | + |
| 73 | +function normalizePath(urlPath, matchers) { |
| 74 | + // Strip the API base prefix so we can match raw spec paths |
| 75 | + let stripped = urlPath; |
| 76 | + let appliedBase = ''; |
| 77 | + for (const base of matchers.basePaths) { |
| 78 | + if (urlPath.startsWith(base)) { |
| 79 | + stripped = urlPath.slice(base.length) || '/'; |
| 80 | + appliedBase = base; |
| 81 | + break; |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + for (const { specPath, re, paramNames } of matchers.compiled) { |
| 86 | + const match = re.exec(stripped); |
| 87 | + if (match) { |
| 88 | + const params = {}; |
| 89 | + paramNames.forEach((name, i) => { |
| 90 | + params[name] = match[i + 1]; |
| 91 | + }); |
| 92 | + return { route: appliedBase + specPath, params }; |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + // No spec match — return a sanitised version to avoid high-cardinality labels |
| 97 | + // (replace values that look like IDs / filenames with a placeholder) |
| 98 | + const route = urlPath |
| 99 | + .replace(/\/[a-fA-F0-9]{24}(\/|$)/g, '/{id}$1') // MongoDB ObjectIds |
| 100 | + .replace(/\/[A-Z0-9]+\.[0-9]+(\/|$)/g, '/{accession}$1'); // accessions like A01X6.1 |
| 101 | + |
| 102 | + return { route, params: {} }; |
| 103 | +} |
| 104 | + |
| 105 | +// --------------------------------------------------------------------------- |
| 106 | +// Public API |
| 107 | +// --------------------------------------------------------------------------- |
| 108 | + |
| 109 | +// Returns an Express middleware that records metrics for every response. |
| 110 | +// Pass the parsed OpenAPI spec and the base paths used by the router. |
| 111 | +function metricsMiddleware(basePaths = ['/rest/current', '/rest/v1'], debug = true) { |
| 112 | + if (!Array.isArray(basePaths)) { |
| 113 | + basePaths = ['/rest/current', '/rest/v1']; |
| 114 | + } |
| 115 | + const matchers = buildMatchers(basePaths); |
| 116 | + |
| 117 | + return function trackMetrics(req, res, next) { |
| 118 | + const startMs = Date.now(); |
| 119 | + |
| 120 | + // IP and Geolocation logic |
| 121 | + const ip = req.ip || (req.connection && req.connection.remoteAddress) || ''; |
| 122 | + if (debug) console.log('ip', ip); |
| 123 | + const geo = geoip.lookup(ip); |
| 124 | + if (debug) console.log('geo', geo); |
| 125 | + |
| 126 | + req.geoStats = { |
| 127 | + country: geo ? geo.country : 'Unknown', |
| 128 | + region: geo ? geo.region : 'Unknown', |
| 129 | + city: geo ? geo.city : 'Unknown', |
| 130 | + // Anonymize IP by keeping only the first two octets (e.g. 192.168.x.x) |
| 131 | + // This way we can still get some geographic info without needing consent |
| 132 | + anonIp: ip.length > 0 ? ip.split('.').slice(0, 2).join('.') + '.0.0' : 'Unknown' |
| 133 | + }; |
| 134 | + if (debug) console.log('ip', req.geoStats); |
| 135 | + |
| 136 | + // Capture the full path NOW — req.path is mutated by Express after sub-router |
| 137 | + // dispatch, but req.originalUrl is always the original unmodified path. |
| 138 | + const fullPath = req.originalUrl.split('?')[0]; |
| 139 | + const isFaviconRequest = fullPath.includes('favicon'); |
| 140 | + const print = debug && !isFaviconRequest; |
| 141 | + if (print) console.log(`Received request: ${req.method} ${fullPath}, path ${req.path}, url ${req.url}`) |
| 142 | + |
| 143 | + res.on('finish', () => { |
| 144 | + const { route, params } = normalizePath(fullPath, matchers); |
| 145 | + if (print) console.log(`Normalized request: ${route}`); |
| 146 | + const labels = { |
| 147 | + // Disabled until we see if we can get real IPs under local network |
| 148 | + // ip: req.geoStats.anonIp, |
| 149 | + method: req.method, |
| 150 | + route, |
| 151 | + status_code: String(res.statusCode), |
| 152 | + ...params |
| 153 | + }; |
| 154 | + if (print) console.log('labels', labels); |
| 155 | + httpRequestsTotal.inc(labels); |
| 156 | + httpRequestDuration.observe(labels, (Date.now() - startMs) / 1000); |
| 157 | + httpGeoRequestsTotal.inc({ |
| 158 | + country: req.geoStats.country, |
| 159 | + region: req.geoStats.region, |
| 160 | + city: req.geoStats.city |
| 161 | + }); |
| 162 | + }); |
| 163 | + |
| 164 | + next(); |
| 165 | + }; |
| 166 | +} |
| 167 | + |
| 168 | +// Express route handler that serves the Prometheus text exposition format. |
| 169 | +async function metricsEndpoint(req, res) { |
| 170 | + res.setHeader('Content-Type', register.contentType); |
| 171 | + res.end(await register.metrics()); |
| 172 | +} |
| 173 | + |
| 174 | +module.exports = { |
| 175 | + metricsMiddleware, |
| 176 | + metricsEndpoint, |
| 177 | +}; |
0 commit comments