Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f470f2b
init
MaxiLein Apr 15, 2025
5313738
Merge branch 'main' into (EvoFrame)-Matching
MaxiLein May 9, 2025
7ffca0d
Add initial competence page layout
MaxiLein May 13, 2025
d6d3838
Merge branch 'main' into (EvoFrame)-Matching
MaxiLein May 13, 2025
7a42089
Add competences to db
MaxiLein May 16, 2025
0902e04
Resolve merge conflict
MaxiLein May 16, 2025
300130d
pass competenes to container
MaxiLein May 16, 2025
5a27a3b
Merge branch 'main' into EvoFrame-Matching
MaxiLein May 16, 2025
a12e510
Rewrite competences in DB
MaxiLein May 19, 2025
9f4c310
Add first layout for SpaceCompetences
MaxiLein May 30, 2025
39a0f01
Merge branch 'main' into EvoFrame-Matching
MaxiLein May 30, 2025
db7dfeb
ran prettier
MaxiLein Jun 2, 2025
c656728
Add paths to ignore for competence matcher database and models
MaxiLein Jun 25, 2025
c980b19
Add section header for Matching Service in .gitignore
MaxiLein Jun 25, 2025
f7edd69
feat: Implement middleware for database selection and logging
MaxiLein Jun 25, 2025
5468886
feat: Add matching functionality and semantic splitting for competenc…
MaxiLein Jul 4, 2025
6e9fc86
feat: Enhance competence matcher with new configuration options, impr…
MaxiLein Jul 5, 2025
d939e8a
Refactor competence matcher: Enhance model loading, semantic splittin…
MaxiLein Jul 14, 2025
8f6c378
feat: Implement OpenAPI specification for Matching Server API with re…
MaxiLein Jul 15, 2025
d3e6ee8
Updated OpenAPI specification for Matching Server API and enhance dat…
MaxiLein Jul 20, 2025
6cf8e30
feat: Enhance matching functionality with new alignment classificatio…
MaxiLein Jul 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@ dataEval.json

# Ignore generated credentials from google-github-actions/auth
gha-creds-*.json

# Matching Service (models and dbs)
./src/competence-matcher/src/db/dbs/
./src/competence-matcher/src/models/
Empty file added src/competence-matcher/.env
Empty file.
720 changes: 720 additions & 0 deletions src/competence-matcher/openAPI.json

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions src/competence-matcher/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"name": "competence-matcher",
"version": "0.1.0",
"description": "Matching microservice that allows to allows to define and match on data criteria",
"main": "dist/server.js",
"scripts": {
"dev": "ts-node-dev --respawn --transpile-only src/server.ts",
"build": "tsc",
"run-production": "node dist/server.js"
},
"repository": {
"type": "git",
"url": "git+https://github.com/PROCEED-Labs/proceed.git"
},
"keywords": [
"embedding",
"matching"
],
"author": "PROCEED Project",
"license": "MIT",
"bugs": {
"url": "https://github.com/PROCEED-Labs/proceed/issues"
},
"homepage": "https://github.com/PROCEED-Labs/proceed#readme",
"dependencies": {
"@huggingface/transformers": "^3.5.2",
"express": "^5.1.0",
"ollama": "^0.5.16",
"sqlite-vec": "^0.1.7-alpha.2"
},
"devDependencies": {
"@types/express": "^5.0.2",
"@types/node": "^22.15.30",
"ts-node-dev": "^2.0.0",
"typescript": "^5.8.3"
},
"engines": {
"node": ">=23.5.0"
}
}
19 changes: 19 additions & 0 deletions src/competence-matcher/src/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import * as os from 'node:os';

export const config = {
dbPath: process.env.DB_PATH || 'src/db/dbs/',
embeddingModel: process.env.EMBEDDING_MODEL || 'onnx-community/Qwen3-Embedding-0.6B-ONNX',
embeddingDim: parseInt(process.env.EMBEDDING_DIM || '1024', 10),
nliModel: process.env.NLI_MODEL || './src/models/roberta_mnli_onnx',
modelCache: process.env.MODEL_CACHE || 'src/models/',
useGPU: process.env.USE_GPU === 'true' || false,
port: parseInt(process.env.PORT || '8501', 10),
multipleDBs: process.env.MULTIPLE_DBS === 'true' || false,
ollamaPath: process.env.OLLAMA_PATH || 'http://localhost:11434',
ollamaBatchSize: parseInt(process.env.OLLAMA_BATCH_SIZE || '5', 10),
splittingModel: process.env.SPLITTING_MODEL || 'llama3.2',
reasonModel: process.env.REASON_MODEL || 'llama3.2',
splittingSymbol: process.env.SPLITTING_SYMBOL || 'SPLITTING_SYMBOL',
maxWorkerThreads: parseInt(process.env.NUMBER_OF_THREADS || String(os.cpus().length - 1), 10), // -1 for main thread
maxJobTime: parseInt(process.env.MAX_JOB_TIME || '600', 10) * 1_000, // converted from seconds to milliseconds
};
162 changes: 162 additions & 0 deletions src/competence-matcher/src/db/db-manager.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import * as path from 'node:path';
import * as fs from 'node:fs';
import VectorDataBase from './db';
import { config } from '../config';

const { dbPath: rawDbPath, embeddingDim } = config;

/**
* DBManager: Singleton that manages multiple VectorDataBase instances keyed by name.
*
*/
class DBManager {
private static managerInstance: DBManager;
private dbInstances = new Map<string, VectorDataBase>();
private static activeDB: VectorDataBase | null = null;
private dbPath: string;
private embeddingDim: number;

private constructor() {
this.embeddingDim = embeddingDim;

// Resolve absolute path for storage directory
this.dbPath = path.resolve(rawDbPath);
// Ensure directory exists
if (!fs.existsSync(this.dbPath)) {
fs.mkdirSync(this.dbPath, { recursive: true });
}
// Load existing databases
this.loadSavedDBs();
}

/**
* Retrieve the singleton DBManager instance, initialising if necessary.
* @returns DBManager singleton
*/
public static getInstance(): DBManager {
if (!DBManager.managerInstance) {
DBManager.managerInstance = new DBManager();
}
return DBManager.managerInstance;
}

/**
* Initialise the DBManager and load any existing databases.
*/
private loadSavedDBs(): void {
// Load existing databases from the storage directory
const files = fs.readdirSync(this.dbPath);
files.forEach((file) => {
if (file.endsWith('.db')) {
const dbName = path.basename(file, '.db');
this.addDBInstance(dbName);
}
});
}

/**
* Normalise a database name by stripping any extension and enforcing `.db`.
* @param dbName Name provided; may include extension.
* @returns Normalised filename ending with `.db`.
*/
private normaliseDBName(dbName: string): string {
const base = path.basename(dbName, path.extname(dbName));
return `${base}.db`;
}

/**
* Resolve full absolute path to the DB file under storage directory.
* @param dbName Name provided by user; normalised to `.db` and joined with storage dir.
* @returns Absolute file path for the database.
*/
private resolveDbPath(dbName: string): string {
const normalisedDBName = this.normaliseDBName(dbName);
return path.join(this.dbPath, normalisedDBName);
}

/**
* Internal: create and cache a new VectorDataBase instance for the given name.
* Uses resolveDbPath to obtain the absolute file path.
* @param dbName Name provided by user; normalised internally.
* @returns Newly created VectorDataBase instance.
*/
private addDBInstance(dbName: string): VectorDataBase {
const normalisedDBName = this.normaliseDBName(dbName);
const filePath = this.resolveDbPath(normalisedDBName);
const db = new VectorDataBase({ filePath, embeddingDim: this.embeddingDim });
this.dbInstances.set(normalisedDBName, db);
return db;
}

/**
* Get the currently active VectorDataBase instance, if set via setActiveDB.
* @returns Active VectorDataBase or null if none is set.
*/
public static getActiveDB(): VectorDataBase | null {
return DBManager.activeDB;
}

/**
* Set the active database by name. Creates the instance if it does not exist.
* @param dbName Name of the database (without extension or with any extension).
*/
public static setActiveDB(dbName: string): void {
const manager = DBManager.getInstance();
const normalisedDBName = manager.normaliseDBName(dbName);
if (!manager.dbInstances.has(normalisedDBName)) {
manager.addDBInstance(normalisedDBName);
}
DBManager.activeDB = manager.dbInstances.get(normalisedDBName)!;
}

/**
* Retrieve (or create) the VectorDataBase instance for given name.
* @param dbName Name of the database (without extension or with any extension).
* @returns VectorDataBase instance corresponding to the name.
*/
public getDB(dbName: string): VectorDataBase {
const normalisedDBName = this.normaliseDBName(dbName);
if (this.dbInstances.has(normalisedDBName)) {
return this.dbInstances.get(normalisedDBName)!;
}
return this.addDBInstance(normalisedDBName);
}

/**
* Close and remove the VectorDataBase instance for given name.
* @param dbName Name of the database to close.
* @returns True if instance existed and was closed; false otherwise.
*/
public closeDB(dbName: string): boolean {
const normalisedDBName = this.normaliseDBName(dbName);
const db = this.dbInstances.get(normalisedDBName);
if (db) {
db.close();
this.dbInstances.delete(normalisedDBName);
if (DBManager.activeDB === db) {
DBManager.activeDB = null;
}
return true;
}
return false;
}

/**
* Close and remove all managed VectorDataBase instances.
*/
public closeAllDBs(): void {
this.dbInstances.forEach((db) => db.close());
this.dbInstances.clear();
DBManager.activeDB = null;
}

/**
* List the names (normalised) of all managed databases.
* @returns Array of database filenames (e.g. ['tenant1.db', 'other.db']).
*/
public listDBs(): string[] {
return Array.from(this.dbInstances.keys());
}
}

export default DBManager;
Loading