Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 502ae34

Browse files
authored
Merge pull request #569 from janhq/chore/update-start-model-api
chore: update start model api
2 parents ba13676 + ab6237f commit 502ae34

File tree

13 files changed

+103
-73
lines changed

13 files changed

+103
-73
lines changed

cortex-js/constant.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ export const databaseFile = `${databaseName}.db`;
44

55
export const defaultCortexJsHost = 'localhost';
66
export const defaultCortexJsPort = 7331;
7+
8+
export const defaultCortexCppHost = '127.0.0.1';
9+
export const defaultCortexCppPort = 3928;
Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1-
import { Model } from '../models/model.interface';
1+
import { Model, ModelSettingParams } from '../models/model.interface';
22
import { Extension } from './extension.abstract';
33

44
export abstract class EngineExtension extends Extension {
55
abstract provider: string;
66

77
abstract inference(completion: any, req: any, stream: any, res?: any): void;
88

9-
async loadModel(model: Model): Promise<void> {}
9+
async loadModel(
10+
model: Model,
11+
settingParams?: ModelSettingParams,
12+
): Promise<void> {}
1013

1114
async unloadModel(modelId: string): Promise<void> {}
1215
}

cortex-js/src/infrastructure/commanders/inference.command.ts

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,41 @@ import { ChatUsecases } from '@/usecases/chat/chat.usecases';
22
import { CommandRunner, SubCommand } from 'nest-commander';
33
import { CreateChatCompletionDto } from '../dtos/chat/create-chat-completion.dto';
44
import { ChatCompletionRole } from '@/domain/models/message.interface';
5-
import { stdout } from 'process';
5+
import { exit, stdin, stdout } from 'node:process';
66
import * as readline from 'node:readline/promises';
77
import { ChatStreamEvent } from '@/domain/abstracts/oai.abstract';
88
import { ChatCompletionMessage } from '../dtos/chat/chat-completion-message.dto';
99

1010
@SubCommand({ name: 'chat' })
1111
export class InferenceCommand extends CommandRunner {
12-
exitClause = 'exit()';
13-
userIndicator = '>> ';
14-
exitMessage = 'Bye!';
12+
private exitClause = 'exit()';
13+
private userIndicator = '>> ';
14+
private exitMessage = 'Bye!';
1515

1616
constructor(private readonly chatUsecases: ChatUsecases) {
1717
super();
1818
}
1919

20-
async run(): Promise<void> {
20+
async run(input: string[]): Promise<void> {
21+
if (input.length == 0) {
22+
console.error('Please provide a model id.');
23+
exit(1);
24+
}
25+
26+
const modelId = input[0];
2127
console.log(`Inorder to exit, type '${this.exitClause}'.`);
2228
const messages: ChatCompletionMessage[] = [];
2329

2430
const rl = readline.createInterface({
25-
input: process.stdin,
26-
output: process.stdout,
31+
input: stdin,
32+
output: stdout,
2733
prompt: this.userIndicator,
2834
});
2935
rl.prompt();
3036

3137
rl.on('close', () => {
3238
console.log(this.exitMessage);
33-
process.exit(0);
39+
exit(0);
3440
});
3541

3642
rl.on('line', (userInput: string) => {
@@ -46,7 +52,7 @@ export class InferenceCommand extends CommandRunner {
4652

4753
const chatDto: CreateChatCompletionDto = {
4854
messages,
49-
model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
55+
model: modelId,
5056
stream: true,
5157
max_tokens: 2048,
5258
stop: [],

cortex-js/src/infrastructure/commanders/init.command.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { resolve } from 'path';
44
import { HttpService } from '@nestjs/axios';
55
import { Presets, SingleBar } from 'cli-progress';
66
import decompress from 'decompress';
7+
import { exit } from 'node:process';
78

89
@SubCommand({ name: 'init', aliases: ['setup'] })
910
export class InitCommand extends CommandRunner {
@@ -41,7 +42,7 @@ export class InitCommand extends CommandRunner {
4142

4243
if (!res?.data) {
4344
console.log('Failed to fetch releases');
44-
process.exit(1);
45+
exit(1);
4546
}
4647

4748
let release = res?.data;
@@ -56,7 +57,7 @@ export class InitCommand extends CommandRunner {
5657

5758
if (!toDownloadAsset) {
5859
console.log(`Could not find engine file ${engineFileName}`);
59-
process.exit(1);
60+
exit(1);
6061
}
6162

6263
console.log(`Downloading engine file ${engineFileName}`);
@@ -107,9 +108,9 @@ export class InitCommand extends CommandRunner {
107108
);
108109
} catch (e) {
109110
console.log(e);
110-
process.exit(1);
111+
exit(1);
111112
}
112-
process.exit(0);
113+
exit(0);
113114
};
114115

115116
parseEngineFileName = (options: {

cortex-js/src/infrastructure/commanders/pull.command.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { CommandRunner, SubCommand } from 'nest-commander';
33
import { CreateModelDto } from '../dtos/models/create-model.dto';
44
import { ModelFormat } from '@/domain/models/model.interface';
55
import { Presets, SingleBar } from 'cli-progress';
6+
import { exit } from 'node:process';
67

78
const AllQuantizations = [
89
'Q3_K_S',
@@ -49,7 +50,7 @@ export class PullCommand extends CommandRunner {
4950
bar.update(progress);
5051
});
5152
console.log('\nDownload complete!');
52-
process.exit(0);
53+
exit(0);
5354
}
5455

5556
async pullHuggingFaceModel(modelId: string) {

cortex-js/src/infrastructure/commanders/start.command.ts

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
22
import { ModelsUsecases } from '@/usecases/models/models.usecases';
33
import { CommandRunner, SubCommand } from 'nest-commander';
4-
import { LoadModelDto } from '../dtos/models/load-model.dto';
54
import { resolve } from 'path';
65
import { existsSync } from 'fs';
6+
import { Model, ModelSettingParams } from '@/domain/models/model.interface';
7+
import { exit } from 'node:process';
78

89
@SubCommand({ name: 'start', aliases: ['run'] })
910
export class StartCommand extends CommandRunner {
@@ -15,29 +16,32 @@ export class StartCommand extends CommandRunner {
1516
}
1617

1718
async run(input: string[]): Promise<void> {
19+
if (input.length === 0) {
20+
console.error('Model ID is required');
21+
exit(1);
22+
}
1823
const modelId = input[0];
24+
const model = await this.getModelOrStop(modelId);
1925

20-
if (!modelId) {
21-
console.log('Model ID is required');
22-
return;
23-
}
2426
return this.startCortex()
25-
.then(() => this.startModel(modelId))
27+
.then(() => this.startModel(model.id))
2628
.then(console.log)
2729
.catch(console.error);
2830
}
2931

3032
private async startCortex() {
3133
if (!existsSync(resolve(this.rootDir(), 'cortex-cpp'))) {
3234
console.log('Please init the cortex by running cortex init command!');
33-
process.exit(0);
35+
exit(0);
3436
}
3537
const host = '127.0.0.1';
36-
const port = '3928';
38+
const port = 3928;
3739
return this.cortexUsecases.startCortex(host, port);
3840
}
41+
3942
private async startModel(modelId: string) {
40-
const settings = {
43+
// TODO: NamH remove these hardcoded value
44+
const settings: ModelSettingParams = {
4145
cpu_threads: 10,
4246
ctx_len: 2048,
4347
embedding: false,
@@ -48,8 +52,16 @@ export class StartCommand extends CommandRunner {
4852
ai_prompt: '\n### Response:',
4953
ngl: 100,
5054
};
51-
const loadModelDto: LoadModelDto = { modelId, settings };
52-
return this.modelsUsecases.startModel(loadModelDto);
55+
return this.modelsUsecases.startModel(modelId, settings);
56+
}
57+
58+
private async getModelOrStop(modelId: string): Promise<Model> {
59+
const model = await this.modelsUsecases.findOne(modelId);
60+
if (!model) {
61+
console.debug('Model not found');
62+
exit(1);
63+
}
64+
return model;
5365
}
5466

5567
rootDir = () => resolve(__dirname, `../../../`);

cortex-js/src/infrastructure/controllers/models.controller.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ import { ModelsUsecases } from '@/usecases/models/models.usecases';
1212
import { CreateModelDto } from '@/infrastructure/dtos/models/create-model.dto';
1313
import { UpdateModelDto } from '@/infrastructure/dtos/models/update-model.dto';
1414
import { ApiResponse, ApiTags } from '@nestjs/swagger';
15-
import { LoadModelSuccessDto } from '@/infrastructure/dtos/models/load-model-success.dto';
16-
import { LoadModelDto } from '@/infrastructure/dtos/models/load-model.dto';
15+
import { StartModelSuccessDto } from '@/infrastructure/dtos/models/start-model-success.dto';
1716
import { DownloadModelDto } from '@/infrastructure/dtos/models/download-model.dto';
17+
import { ModelSettingParamsDto } from '../dtos/models/model-setting-params.dto';
1818

1919
@ApiTags('Models')
2020
@Controller('models')
@@ -30,11 +30,14 @@ export class ModelsController {
3030
@ApiResponse({
3131
status: 200,
3232
description: 'The model has been loaded successfully.',
33-
type: LoadModelSuccessDto,
33+
type: StartModelSuccessDto,
3434
})
35-
@Post('load')
36-
load(@Body() loadModelDto: LoadModelDto) {
37-
return this.modelsService.startModel(loadModelDto);
35+
@Post(':modelId/start')
36+
startModel(
37+
@Param('modelId') modelId: string,
38+
@Body() settings: ModelSettingParamsDto,
39+
) {
40+
return this.modelsService.startModel(modelId, settings);
3841
}
3942

4043
@Post('download')
Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
import { IsIP, IsString } from 'class-validator';
1+
import { IsIP, IsNumber, IsString, Max, Min } from 'class-validator';
22

33
export class StartCortexDto {
44
@IsString()
55
@IsIP()
66
host: string;
77

8-
@IsString()
9-
port: string;
8+
@IsNumber()
9+
@Min(0)
10+
@Max(65535)
11+
port: number;
1012
}

cortex-js/src/infrastructure/dtos/models/load-model.dto.ts

Lines changed: 0 additions & 11 deletions
This file was deleted.

cortex-js/src/infrastructure/dtos/models/load-model-success.dto.ts renamed to cortex-js/src/infrastructure/dtos/models/start-model-success.dto.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { IsString } from 'class-validator';
22

3-
export class LoadModelSuccessDto {
3+
export class StartModelSuccessDto {
44
@IsString()
55
message: string;
66

0 commit comments

Comments
 (0)