fix: cli commands (#556)

louis-jan · web-flow · commit 62f8ddbc6b85 · 2024-05-13T14:30:15.000+07:00
diff --git a/cortex-js/package.json b/cortex-js/package.json
@@ -41,8 +41,6 @@
     "node-fetch": "2",
     "readline": "^1.3.0",
     "reflect-metadata": "^0.2.0",
-    "request": "^2.88.2",
-    "request-progress": "^3.0.0",
     "rxjs": "^7.8.1",
     "sqlite": "^5.1.1",
     "sqlite3": "^5.1.7",
diff --git a/cortex-js/src/command.module.ts b/cortex-js/src/command.module.ts
@@ -7,6 +7,8 @@ import { CortexModule } from './usecases/cortex/cortex.module';
 import { ServeCommand } from './infrastructure/commanders/serve.command';
 import { PullCommand } from './infrastructure/commanders/pull.command';
 import { InferenceCommand } from './infrastructure/commanders/inference.command';
+import { ModelsCommand } from './infrastructure/commanders/models.command';
+import { StartCommand } from './infrastructure/commanders/start.command';
 
 @Module({
   imports: [
@@ -19,6 +21,13 @@ import { InferenceCommand } from './infrastructure/commanders/inference.command'
     ModelsModule,
     CortexModule,
   ],
-  providers: [BasicCommand, PullCommand, ServeCommand, InferenceCommand],
+  providers: [
+    BasicCommand,
+    ModelsCommand,
+    PullCommand,
+    ServeCommand,
+    InferenceCommand,
+    StartCommand,
+  ],
 })
 export class CommandModule {}
diff --git a/cortex-js/src/domain/abstracts/engine.abstract.ts b/cortex-js/src/domain/abstracts/engine.abstract.ts
@@ -4,4 +4,5 @@ export abstract class EngineExtension extends Extension {
   abstract provider: string;
   abstract inference(completion: any, req: any, res: any): void;
   abstract loadModel(loadModel: any): Promise<void>;
+  abstract unloadModel(modelId: string): Promise<void>;
 }
diff --git a/cortex-js/src/domain/abstracts/oai.abstract.ts b/cortex-js/src/domain/abstracts/oai.abstract.ts
@@ -1,3 +1,4 @@
+/* eslint-disable @typescript-eslint/no-unused-vars */
 import { EngineExtension } from './engine.abstract';
 
 export abstract class OAIEngineExtension extends EngineExtension {
@@ -32,6 +33,6 @@ export abstract class OAIEngineExtension extends EngineExtension {
     response.body.pipe(res);
   }
 
-  // eslint-disable-next-line @typescript-eslint/no-unused-vars
   async loadModel(loadModel: any): Promise<void> {}
+  async unloadModel(modelId: string): Promise<void> {}
 }
diff --git a/cortex-js/src/infrastructure/commanders/basic-command.commander.ts b/cortex-js/src/infrastructure/commanders/basic-command.commander.ts
@@ -1,67 +1,52 @@
-import { RootCommand, CommandRunner } from 'nest-commander';
-import { ModelsUsecases } from 'src/usecases/models/models.usecases';
-import { LoadModelDto } from '../dtos/models/load-model.dto';
-import { CortexUsecases } from 'src/usecases/cortex/cortex.usecases';
+import { RootCommand, CommandRunner, Option } from 'nest-commander';
 import { PullCommand } from './pull.command';
 import { ServeCommand } from './serve.command';
 import { InferenceCommand } from './inference.command';
+import { ModelsCommand } from './models.command';
+import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
+import { defaultCortexJsHost, defaultCortexJsPort } from 'constant';
 
 @RootCommand({
-  subCommands: [PullCommand, ServeCommand, InferenceCommand],
+  subCommands: [ModelsCommand, PullCommand, ServeCommand, InferenceCommand],
 })
 export class BasicCommand extends CommandRunner {
-  constructor(
-    private readonly modelsUsecases: ModelsUsecases,
-    private readonly cortexUsecases: CortexUsecases,
-  ) {
+  constructor(private readonly cortexUsecases: CortexUsecases) {
     super();
   }
 
-  async run(input: string[]): Promise<void> {
+  async run(input: string[], options?: any): Promise<void> {
     const command = input[0];
 
     switch (command) {
-      case 'models':
-        this.modelsUsecases.findAll().then((e: any) => console.log(e));
-        return;
-
       case 'start':
-        return this.startCortex();
-
-      case 'load':
-        return this.loadModel(input);
-
+        const host = options?.host || defaultCortexJsHost;
+        const port = options?.port || defaultCortexJsPort;
+        return this.cortexUsecases
+          .startCortex(host, port)
+          .then((e) => console.log(e));
+      case 'stop':
+        return this.cortexUsecases
+          .stopCortex(defaultCortexJsHost, defaultCortexJsPort)
+          .then((e) => console.log(e));
       default:
         console.error(`Command ${command} is not supported`);
         return;
     }
   }
 
-  private async startCortex(): Promise<void> {
-    const host = '127.0.0.1';
-    const port = '3928';
-    const result = await this.cortexUsecases.startCortex(host, port);
-    console.log(result);
+  @Option({
+    flags: '--host <host>',
+    description: 'Host to serve the application',
+  })
+  parseHost(value: string) {
+    return value;
   }
 
-  private async loadModel(input: string[]): Promise<void> {
-    if (input.length < 2) {
-      return Promise.reject('Model ID is required');
-    }
-    const settings = {
-      cpu_threads: 10,
-      ctx_len: 2048,
-      embedding: false,
-      prompt_template:
-        '{system_message}\n### Instruction: {prompt}\n### Response:',
-      system_prompt: '',
-      user_prompt: '\n### Instruction: ',
-      ai_prompt: '\n### Response:',
-      ngl: 100,
-    };
-    const loadModelDto: LoadModelDto = { modelId: input[1], settings };
-    await this.modelsUsecases
-      .loadModel(loadModelDto)
-      .then((e) => console.log(e));
+  @Option({
+    flags: '--port <port>',
+    description: 'Port to serve the application',
+  })
+  parsePort(value: string) {
+    return parseInt(value, 10);
   }
 }
diff --git a/cortex-js/src/infrastructure/commanders/inference.command.ts b/cortex-js/src/infrastructure/commanders/inference.command.ts
@@ -1,7 +1,6 @@
 import { CommandRunner, SubCommand } from 'nest-commander';
 
-// TODO: might need to change the name of the command. Inference seems a bit hard to type lol
-@SubCommand({ name: 'inference' })
+@SubCommand({ name: 'chat' })
 export class InferenceCommand extends CommandRunner {
   constructor() {
     super();
diff --git a/cortex-js/src/infrastructure/commanders/models.command.ts b/cortex-js/src/infrastructure/commanders/models.command.ts
@@ -0,0 +1,52 @@
+import { ModelsUsecases } from '@/usecases/models/models.usecases';
+import { CommandRunner, SubCommand } from 'nest-commander';
+import { PullCommand } from './pull.command';
+import { StartCommand } from './start.command';
+
+@SubCommand({ name: 'models', subCommands: [PullCommand, StartCommand] })
+export class ModelsCommand extends CommandRunner {
+  constructor(private readonly modelsUsecases: ModelsUsecases) {
+    super();
+  }
+
+  async run(input: string[]): Promise<void> {
+    const command = input[0];
+    const modelId = input[1];
+
+    if (command !== 'list') {
+      if (!modelId) {
+        console.log('Model ID is required');
+        return;
+      }
+    }
+
+    switch (command) {
+      case 'list':
+        this.modelsUsecases.findAll().then(console.log);
+        return;
+      case 'get':
+        this.modelsUsecases.findOne(modelId).then(console.log);
+        return;
+      case 'remove':
+        this.modelsUsecases.remove(modelId).then(console.log);
+        return;
+
+      case 'stop':
+        return this.modelsUsecases
+          .stopModel(modelId)
+          .then(console.log)
+          .catch(console.error);
+
+      case 'stats':
+      case 'fetch':
+      case 'build': {
+        console.log('Command is not supported yet');
+        return;
+      }
+
+      default:
+        console.error(`Command ${command} is not supported`);
+        return;
+    }
+  }
+}
diff --git a/cortex-js/src/infrastructure/commanders/pull.command.ts b/cortex-js/src/infrastructure/commanders/pull.command.ts
@@ -2,6 +2,7 @@ import { ModelsUsecases } from '@/usecases/models/models.usecases';
 import { CommandRunner, SubCommand } from 'nest-commander';
 import { CreateModelDto } from '../dtos/models/create-model.dto';
 import { ModelFormat } from '@/domain/models/model.interface';
+import { Presets, SingleBar } from 'cli-progress';
 
 const AllQuantizations = [
   'Q3_K_S',
@@ -26,30 +27,29 @@ const AllQuantizations = [
   'COPY',
 ];
 
-@SubCommand({ name: 'pull' })
+@SubCommand({ name: 'pull', aliases: ['download'] })
 export class PullCommand extends CommandRunner {
   constructor(private readonly modelsUsecases: ModelsUsecases) {
     super();
   }
 
   async run(input: string[]): Promise<void> {
-    if (input.length < 2) {
+    if (input.length < 1) {
       return Promise.reject('Model ID is required');
     }
 
     const modelId = input[0];
     if (modelId.includes('/')) {
       await this.pullHuggingFaceModel(modelId);
     }
-    this.modelsUsecases.downloadModel({ modelId });
-
-    // const bar = new SingleBar({}, Presets.shades_classic);
-    // bar.start(100, 0);
-    // await this.modelsUsecases.downloadModelProgress({ modelId }, (progress) => {
-    //   bar.update(progress);
-    // });
-    // console.log('\nDownload complete!');
-    // process.exit(0);
+
+    const bar = new SingleBar({}, Presets.shades_classic);
+    bar.start(100, 0);
+    await this.modelsUsecases.downloadModel({ modelId }, (progress) => {
+      bar.update(progress);
+    });
+    console.log('\nDownload complete!');
+    process.exit(0);
   }
 
   async pullHuggingFaceModel(modelId: string) {
diff --git a/cortex-js/src/infrastructure/commanders/serve.command.ts b/cortex-js/src/infrastructure/commanders/serve.command.ts
@@ -1,5 +1,5 @@
 import { spawn } from 'child_process';
-import { defaultCortexJsHost } from 'constant';
+import { defaultCortexJsHost, defaultCortexJsPort } from 'constant';
 import { CommandRunner, SubCommand, Option } from 'nest-commander';
 import { join } from 'path';
 
@@ -16,7 +16,7 @@ export class ServeCommand extends CommandRunner {
 
   async run(_input: string[], options?: ServeOptions): Promise<void> {
     const host = options?.host || defaultCortexJsHost;
-    const port = options?.port || defaultCortexJsHost;
+    const port = options?.port || defaultCortexJsPort;
 
     spawn('node', [join(__dirname, '../../main.js')], {
       env: {
diff --git a/cortex-js/src/infrastructure/commanders/start.command.ts b/cortex-js/src/infrastructure/commanders/start.command.ts
@@ -0,0 +1,48 @@
+import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
+import { ModelsUsecases } from '@/usecases/models/models.usecases';
+import { CommandRunner, SubCommand } from 'nest-commander';
+import { LoadModelDto } from '../dtos/models/load-model.dto';
+
+@SubCommand({ name: 'start', aliases: ['run'] })
+export class StartCommand extends CommandRunner {
+  constructor(
+    private readonly modelsUsecases: ModelsUsecases,
+    private readonly cortexUsecases: CortexUsecases,
+  ) {
+    super();
+  }
+
+  async run(input: string[]): Promise<void> {
+    const modelId = input[0];
+
+    if (!modelId) {
+      console.log('Model ID is required');
+      return;
+    }
+    return this.startCortex()
+      .then(() => this.startModel(modelId))
+      .then(console.log)
+      .catch(console.error);
+  }
+
+  private async startCortex() {
+    const host = '127.0.0.1';
+    const port = '3928';
+    return this.cortexUsecases.startCortex(host, port);
+  }
+  private async startModel(modelId: string) {
+    const settings = {
+      cpu_threads: 10,
+      ctx_len: 2048,
+      embedding: false,
+      prompt_template:
+        '{system_message}\n### Instruction: {prompt}\n### Response:',
+      system_prompt: '',
+      user_prompt: '\n### Instruction: ',
+      ai_prompt: '\n### Response:',
+      ngl: 100,
+    };
+    const loadModelDto: LoadModelDto = { modelId, settings };
+    return this.modelsUsecases.startModel(loadModelDto);
+  }
+}
diff --git a/cortex-js/src/infrastructure/controllers/models.controller.ts b/cortex-js/src/infrastructure/controllers/models.controller.ts
@@ -34,7 +34,7 @@ export class ModelsController {
   })
   @Post('load')
   load(@Body() loadModelDto: LoadModelDto) {
-    return this.modelsService.loadModel(loadModelDto);
+    return this.modelsService.startModel(loadModelDto);
   }
 
   @Post('download')
diff --git a/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts b/cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts
@@ -9,21 +9,21 @@ import { ConfigService } from '@nestjs/config';
  * The class provides methods for initializing and stopping a model, and for making inference requests.
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  */
+const LOCAL_HOST = '127.0.0.1';
+const NITRO_DEFAULT_PORT = 3928;
+const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`;
+const LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`;
+const UNLOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/unloadmodel`;
 @Injectable()
 export default class CortexProvider extends OAIEngineExtension {
   provider: string = 'cortex';
-  apiUrl = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion';
+  apiUrl = 'http://127.0.0.1:3928/inferences/server/chat_completion';
 
   constructor(private readonly configService: ConfigService) {
     super();
   }
 
   override async loadModel(model: Model): Promise<void> {
-    const LOCAL_HOST = '127.0.0.1';
-    const NITRO_DEFAULT_PORT = 3928;
-    const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`;
-    const url = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
-
     const modelsContainerDir =
       this.configService.get<string>('CORTEX_MODELS_DIR') ??
       resolve('./models');
@@ -60,13 +60,33 @@ export default class CortexProvider extends OAIEngineExtension {
       nitroModelSettings.ai_prompt = prompt.ai_prompt;
     }
 
-    await fetch(url, {
+    return fetch(LOAD_MODEL_URL, {
       method: 'POST',
       headers: {
         'Content-Type': 'application/json',
       },
       body: JSON.stringify(nitroModelSettings),
-    });
+    })
+      .then((res) => {
+        if (!res.ok) {
+          throw new Error('Failed to load model');
+        }
+      })
+      .catch((e) => {
+        throw e;
+      });
+  }
+
+  override async unloadModel(modelId: string): Promise<void> {
+    return fetch(UNLOAD_MODEL_URL, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        model: modelId,
+      }),
+    }).then();
   }
 
   private readonly promptTemplateConverter = (
diff --git a/cortex-js/src/usecases/cortex/cortex.usecases.ts b/cortex-js/src/usecases/cortex/cortex.usecases.ts
diff --git a/cortex-js/src/usecases/models/models.usecases.ts b/cortex-js/src/usecases/models/models.usecases.ts

Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,5 @@ export abstract class EngineExtension extends Extension {`
`4`	`4`	`abstract provider: string;`
`5`	`5`	`abstract inference(completion: any, req: any, res: any): void;`
`6`	`6`	`abstract loadModel(loadModel: any): Promise<void>;`
	`7`	`+ abstract unloadModel(modelId: string): Promise<void>;`
`7`	`8`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+/* eslint-disable @typescript-eslint/no-unused-vars */`
`1`	`2`	`import { EngineExtension } from './engine.abstract';`
`2`	`3`
`3`	`4`	`export abstract class OAIEngineExtension extends EngineExtension {`
`@@ -32,6 +33,6 @@ export abstract class OAIEngineExtension extends EngineExtension {`
`32`	`33`	`response.body.pipe(res);`
`33`	`34`	`}`
`34`	`35`
`35`		`- // eslint-disable-next-line @typescript-eslint/no-unused-vars`
`36`	`36`	`async loadModel(loadModel: any): Promise<void> {}`
	`37`	`+ async unloadModel(modelId: string): Promise<void> {}`
`37`	`38`	`}`