Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit cdf9e04

Browse files
authored
feat: refactor cortex API with new model.yaml structure (#656)
1 parent bf665bf commit cdf9e04

35 files changed

+767
-837
lines changed

cortex-js/src/app.module.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { DatabaseModule } from './infrastructure/database/database.module';
77
import { ChatModule } from './usecases/chat/chat.module';
88
import { AssistantsModule } from './usecases/assistants/assistants.module';
99
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
10+
import { ModelRepositoryModule } from './infrastructure/repositories/model/model.module';
1011
import { CortexModule } from './usecases/cortex/cortex.module';
1112
import { ConfigModule } from '@nestjs/config';
1213
import { env } from 'node:process';
@@ -32,6 +33,7 @@ import { AppLoggerMiddleware } from './infrastructure/middlewares/app.logger.mid
3233
CortexModule,
3334
ExtensionModule,
3435
FileManagerModule,
36+
ModelRepositoryModule,
3537
],
3638
providers: [SeedService],
3739
})

cortex-js/src/domain/models/model.interface.ts

Lines changed: 54 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,6 @@
1-
/**
2-
* Represents the information about a model.
3-
* @stored
4-
*/
5-
export interface ModelInfo {
6-
id: string;
7-
settings: ModelSettingParams;
8-
parameters: ModelRuntimeParams;
9-
engine?: string;
10-
}
11-
121
export interface ModelArtifact {
13-
url: string;
14-
}
15-
16-
export enum ModelFormat {
17-
GGUF = 'gguf',
18-
API = 'api',
2+
mmproj?: string;
3+
llama_model_path?: string;
194
}
205

216
/**
@@ -24,64 +9,91 @@ export enum ModelFormat {
249
*/
2510
export interface Model {
2611
/**
27-
* The type of the object.
28-
* Default: "model"
12+
* Model identifier.
2913
*/
30-
object: string;
14+
model: string;
3115

3216
/**
33-
* The version of the model.
17+
* GGUF metadata: general.name
3418
*/
35-
version: string;
19+
name?: string;
3620

3721
/**
38-
* The format of the model.
22+
* GGUF metadata: version
3923
*/
40-
format: ModelFormat;
24+
version?: string;
4125

4226
/**
4327
* The model download source. It can be an external url or a local filepath.
4428
*/
45-
sources: ModelArtifact[];
29+
files: string[] | ModelArtifact;
30+
31+
/**
32+
* GGUF metadata: tokenizer.chat_template
33+
*/
34+
prompt_template?: string;
35+
36+
/**
37+
* Defines specific tokens or phrases at which the model will stop generating further output.
38+
*/
39+
stop?: string[];
40+
41+
/// Inferencing
42+
/**
43+
* Set probability threshold for more relevant outputs.
44+
*/
45+
top_p?: number;
4646

4747
/**
48-
* The model identifier, which can be referenced in the API endpoints.
48+
* Controls the randomness of the model’s output.
4949
*/
50-
id: string;
50+
temperature?: number;
5151

5252
/**
53-
* Human-readable name that is used for UI.
53+
* Adjusts the likelihood of the model repeating words or phrases in its output.
5454
*/
55-
name: string;
55+
frequency_penalty?: number;
5656

5757
/**
58-
* The Unix timestamp (in seconds) for when the model was created
58+
* Influences the generation of new and varied concepts in the model’s output.
5959
*/
60-
created: number;
60+
presence_penalty?: number;
6161

62+
/// Engines
6263
/**
63-
* Default: "A cool model from Huggingface"
64+
* The context length for model operations varies; the maximum depends on the specific model used.
6465
*/
65-
description: string;
66+
ctx_len?: number;
6667

6768
/**
68-
* The model settings.
69+
* Enable real-time data processing for faster predictions.
6970
*/
70-
settings: ModelSettingParams;
71+
stream?: boolean;
72+
73+
/*
74+
* The maximum number of tokens the model will generate in a single response.
75+
*/
76+
max_tokens?: number;
7177

7278
/**
73-
* The model runtime parameters.
79+
* The number of layers to load onto the GPU for acceleration.
7480
*/
75-
parameters: ModelRuntimeParams;
81+
ngl?: number;
7682

7783
/**
78-
* Metadata of the model.
84+
* The number of parallel operations. Only set when enable continuous batching.
7985
*/
80-
metadata: ModelMetadata;
86+
n_parallel?: number;
87+
88+
/**
89+
* Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)
90+
*/
91+
cpu_threads?: number;
92+
8193
/**
8294
* The model engine.
8395
*/
84-
engine: string;
96+
engine?: string;
8597
}
8698

8799
export interface ModelMetadata {
@@ -109,6 +121,8 @@ export interface ModelSettingParams {
109121
cont_batching?: boolean;
110122
vision_model?: boolean;
111123
text_model?: boolean;
124+
engine?: string;
125+
stop?: string[];
112126
}
113127

114128
/**
@@ -133,8 +147,3 @@ export interface ModelRuntimeParams {
133147
export type ModelInitFailed = Model & {
134148
error: Error;
135149
};
136-
137-
export interface NitroModelSettings extends ModelSettingParams {
138-
llama_model_path: string;
139-
cpu_threads: number;
140-
}

cortex-js/src/domain/models/thread.interface.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { AssistantTool } from './assistant.interface';
2-
import { ModelInfo } from './model.interface';
2+
import { Model } from './model.interface';
33

44
export interface Thread {
55
/** Unique identifier for the thread, generated by default using the ULID method. **/
@@ -40,7 +40,7 @@ export interface ThreadMetadata {
4040
export interface ThreadAssistantInfo {
4141
assistant_id: string;
4242
assistant_name: string;
43-
model: ModelInfo;
43+
model: Partial<Model>;
4444
instructions?: string;
4545
tools?: AssistantTool[];
4646
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import { Model } from '../models/model.interface';
2+
import { Repository } from './repository.interface';
3+
4+
export abstract class ModelRepository extends Repository<Model> {}

cortex-js/src/file-manager/file-manager.service.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@ export class FileManagerService {
1010
private configFile = '.cortexrc';
1111
private cortexDirectoryName = 'cortex';
1212
private modelFolderName = 'models';
13+
private extensionFoldername = 'extensions';
1314
private cortexCppFolderName = 'cortex-cpp';
1415

16+
/**
17+
* Get cortex configs
18+
* @returns the config object
19+
*/
1520
async getConfig(): Promise<Config> {
1621
const homeDir = os.homedir();
1722
const configPath = join(homeDir, this.configFile);
@@ -71,8 +76,33 @@ export class FileManagerService {
7176
};
7277
}
7378

79+
/**
80+
* Get the app data folder path
81+
* Usually it is located at the home directory > cortex
82+
* @returns the path to the data folder
83+
*/
7484
async getDataFolderPath(): Promise<string> {
7585
const config = await this.getConfig();
7686
return config.dataFolderPath;
7787
}
88+
89+
/**
90+
* Get the models data folder path
91+
* Usually it is located at the home directory > cortex > models
92+
* @returns the path to the models folder
93+
*/
94+
async getModelsPath(): Promise<string> {
95+
const dataFolderPath = await this.getDataFolderPath();
96+
return join(dataFolderPath, this.modelFolderName);
97+
}
98+
99+
/**
100+
* Get the extensions data folder path
101+
* Usually it is located at the home directory > cortex > extensions
102+
* @returns the path to the extensions folder
103+
*/
104+
async getExtensionsPath(): Promise<string> {
105+
const dataFolderPath = await this.getDataFolderPath();
106+
return join(dataFolderPath, this.extensionFoldername);
107+
}
78108
}

cortex-js/src/infrastructure/commanders/models/model-get.command.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ export class ModelGetCommand extends CommandRunner {
1414
exit(1);
1515
}
1616

17-
const models = await this.modelsCliUsecases.getModel(input[0]);
18-
console.log(models);
17+
const model = await this.modelsCliUsecases.getModel(input[0]);
18+
if (!model) console.error('Model not found');
19+
else console.log(model);
1920
}
2021
}

cortex-js/src/infrastructure/commanders/models/model-list.command.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@ export class ModelListCommand extends CommandRunner {
1515
option.format === 'table'
1616
? console.table(
1717
models.map((e) => ({
18-
id: e.id,
18+
id: e.model,
1919
engine: e.engine,
20-
format: e.format,
21-
created: e.created,
20+
version: e.version,
2221
})),
2322
)
2423
: console.log(models);

cortex-js/src/infrastructure/commanders/models/model-pull.command.ts

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { CommandRunner, InquirerService, SubCommand } from 'nest-commander';
22
import { exit } from 'node:process';
33
import { ModelsCliUsecases } from '../usecases/models.cli.usecases';
44
import { RepoDesignation, listFiles } from '@huggingface/hub';
5-
import { basename } from 'node:path';
5+
import { ModelNotFoundException } from '@/infrastructure/exception/model-not-found.exception';
66

77
@SubCommand({
88
name: 'pull',
@@ -29,12 +29,16 @@ export class ModelPullCommand extends CommandRunner {
2929
? undefined
3030
: await this.tryToGetBranches(input[0]);
3131

32-
if (!branches) {
33-
await this.modelsCliUsecases.pullModel(input[0]);
34-
} else {
35-
// if there's metadata.yaml file, we assumed it's a JanHQ model
36-
await this.handleJanHqModel(input[0], branches);
37-
}
32+
await this.modelsCliUsecases
33+
.pullModel(
34+
!branches ? input[0] : await this.handleJanHqModel(input[0], branches),
35+
)
36+
.catch((e: Error) => {
37+
if (e instanceof ModelNotFoundException)
38+
console.error('Model does not exist.');
39+
else console.error(e);
40+
exit(1);
41+
});
3842

3943
console.log('\nDownload complete!');
4044
exit(0);
@@ -83,10 +87,6 @@ export class ModelPullCommand extends CommandRunner {
8387
}
8488

8589
private async handleJanHqModel(repoName: string, branches: string[]) {
86-
const sanitizedRepoName = repoName.trim().startsWith(this.janHqModelPrefix)
87-
? repoName
88-
: `${this.janHqModelPrefix}/${repoName}`;
89-
9090
let selectedTag = branches[0];
9191

9292
if (branches.length > 1) {
@@ -98,30 +98,7 @@ export class ModelPullCommand extends CommandRunner {
9898
console.error("Can't find model revision.");
9999
exit(1);
100100
}
101-
102-
const repo: RepoDesignation = { type: 'model', name: sanitizedRepoName };
103-
let ggufUrl: string | undefined = undefined;
104-
let fileSize = 0;
105-
for await (const fileInfo of listFiles({
106-
repo: repo,
107-
revision: revision,
108-
})) {
109-
if (fileInfo.path.endsWith('.gguf')) {
110-
ggufUrl = `https://huggingface.co/${sanitizedRepoName}/resolve/${revision}/${fileInfo.path}`;
111-
fileSize = fileInfo.size;
112-
break;
113-
}
114-
}
115-
116-
if (!ggufUrl) {
117-
console.error("Can't find model file.");
118-
exit(1);
119-
}
120-
console.log('Downloading', basename(ggufUrl));
121-
await this.modelsCliUsecases.pullModelWithExactUrl(
122-
`${sanitizedRepoName}/${revision}`,
123-
ggufUrl,
124-
fileSize,
125-
);
101+
// Return parsed model Id
102+
return `${repoName}:${revision}`;
126103
}
127104
}

cortex-js/src/infrastructure/commanders/models/model-start.command.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,16 @@ export class ModelStartCommand extends CommandRunner {
3333
}
3434
}
3535

36+
const existingModel = await this.modelsCliUsecases.getModel(modelId);
37+
if (
38+
!existingModel ||
39+
!Array.isArray(existingModel.files) ||
40+
/^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0])
41+
) {
42+
console.error('Model is not available. Please pull the model first.');
43+
process.exit(1);
44+
}
45+
3646
await this.cortexUsecases
3747
.startCortex(options.attach)
3848
.then(() => this.modelsCliUsecases.startModel(modelId, options.preset))
@@ -41,15 +51,19 @@ export class ModelStartCommand extends CommandRunner {
4151
}
4252

4353
modelInquiry = async () => {
44-
const models = await this.modelsCliUsecases.listAllModels();
54+
const models = (await this.modelsCliUsecases.listAllModels()).filter(
55+
(model) =>
56+
Array.isArray(model.files) &&
57+
!/^(http|https):\/\/[^/]+\/.*/.test(model.files[0]),
58+
);
4559
if (!models.length) throw 'No models found';
4660
const { model } = await this.inquirerService.inquirer.prompt({
4761
type: 'list',
4862
name: 'model',
4963
message: 'Select a model to start:',
5064
choices: models.map((e) => ({
5165
name: e.name,
52-
value: e.id,
66+
value: e.model,
5367
})),
5468
});
5569
return model;

0 commit comments

Comments
 (0)