Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 0cbbf08

Browse files
authored
feat: cortex benchmark command (#684)
1 parent c717df2 commit 0cbbf08

File tree

13 files changed

+372
-11
lines changed

13 files changed

+372
-11
lines changed

cortex-js/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,13 @@
4848
"decompress": "^4.2.1",
4949
"js-yaml": "^4.1.0",
5050
"nest-commander": "^3.13.0",
51+
"openai": "^4.50.0",
5152
"readline": "^1.3.0",
5253
"reflect-metadata": "^0.2.0",
5354
"rxjs": "^7.8.1",
5455
"sqlite": "^5.1.1",
5556
"sqlite3": "^5.1.7",
57+
"systeminformation": "^5.22.10",
5658
"typeorm": "^0.3.20",
5759
"ulid": "^2.3.0",
5860
"update-notifier": "^5.0.0",

cortex-js/src/command.module.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import { PSCommand } from './infrastructure/commanders/ps.command';
2828
import { KillCommand } from './infrastructure/commanders/kill.command';
2929
import { PresetCommand } from './infrastructure/commanders/presets.command';
3030
import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command';
31+
import { BenchmarkCommand } from './infrastructure/commanders/benchmark.command';
3132

3233
@Module({
3334
imports: [
@@ -56,6 +57,7 @@ import { EmbeddingCommand } from './infrastructure/commanders/embeddings.command
5657
KillCommand,
5758
PresetCommand,
5859
EmbeddingCommand,
60+
BenchmarkCommand,
5961

6062
// Questions
6163
InitRunModeQuestions,

cortex-js/src/file-manager/file-manager.service.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export class FileManagerService {
1212
private modelFolderName = 'models';
1313
private presetFolderName = 'presets';
1414
private extensionFoldername = 'extensions';
15+
private benchmarkFoldername = 'benchmark';
1516
private cortexCppFolderName = 'cortex-cpp';
1617

1718
/**
@@ -116,4 +117,14 @@ export class FileManagerService {
116117
const dataFolderPath = await this.getDataFolderPath();
117118
return join(dataFolderPath, this.extensionFoldername);
118119
}
120+
121+
/**
122+
* Get the benchmark folder path
123+
* Usually it is located at the home directory > cortex > extensions
124+
* @returns the path to the extensions folder
125+
*/
126+
async getBenchmarkPath(): Promise<string> {
127+
const dataFolderPath = await this.getDataFolderPath();
128+
return join(dataFolderPath, this.benchmarkFoldername);
129+
}
119130
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { CommandRunner, SubCommand } from 'nest-commander';
2+
import { BenchmarkCliUsecases } from './usecases/benchmark.cli.usecases';
3+
4+
@SubCommand({
5+
name: 'benchmark',
6+
subCommands: [],
7+
description:
8+
'Benchmark and analyze the performance of a specific AI model using a variety of system resources',
9+
})
10+
export class BenchmarkCommand extends CommandRunner {
11+
constructor(private readonly benchmarkUsecases: BenchmarkCliUsecases) {
12+
super();
13+
}
14+
15+
async run(): Promise<void> {
16+
return this.benchmarkUsecases.benchmark();
17+
}
18+
}

cortex-js/src/infrastructure/commanders/chat.command.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ import {
66
} from 'nest-commander';
77
import { ChatCliUsecases } from './usecases/chat.cli.usecases';
88
import { exit } from 'node:process';
9-
import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases';
9+
import { PSCliUsecases } from './usecases/ps.cli.usecases';
1010
import { ModelsUsecases } from '@/usecases/models/models.usecases';
11+
import { ModelStat } from './types/model-stat.interface';
1112

1213
type ChatOptions = {
1314
threadId?: string;

cortex-js/src/infrastructure/commanders/cortex-command.commander.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { KillCommand } from './kill.command';
1010
import pkg from '@/../package.json';
1111
import { PresetCommand } from './presets.command';
1212
import { EmbeddingCommand } from './embeddings.command';
13+
import { BenchmarkCommand } from './benchmark.command';
1314

1415
interface CortexCommandOptions {
1516
version: boolean;
@@ -26,6 +27,7 @@ interface CortexCommandOptions {
2627
KillCommand,
2728
PresetCommand,
2829
EmbeddingCommand,
30+
BenchmarkCommand,
2931
],
3032
description: 'Cortex CLI',
3133
})

cortex-js/src/infrastructure/commanders/embeddings.command.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ import {
55
SubCommand,
66
} from 'nest-commander';
77
import { ModelsUsecases } from '@/usecases/models/models.usecases';
8-
import { ModelStat, PSCliUsecases } from './usecases/ps.cli.usecases';
8+
import { PSCliUsecases } from './usecases/ps.cli.usecases';
99
import { ChatCliUsecases } from './usecases/chat.cli.usecases';
1010
import { inspect } from 'util';
11+
import { ModelStat } from './types/model-stat.interface';
1112

1213
interface EmbeddingCommandOptions {
1314
encoding_format?: string;
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { ChatCompletionMessageParam } from 'openai/resources';
2+
3+
export interface BenchmarkConfig {
4+
api: {
5+
base_url: string;
6+
api_key: string;
7+
parameters: {
8+
messages: ChatCompletionMessageParam[];
9+
model: string;
10+
stream?: boolean;
11+
max_tokens?: number;
12+
stop?: string[];
13+
frequency_penalty?: number;
14+
presence_penalty?: number;
15+
temperature?: number;
16+
top_p?: number;
17+
};
18+
};
19+
prompts?: {
20+
min: number;
21+
max: number;
22+
samples: number;
23+
};
24+
output: string;
25+
concurrency: number;
26+
num_rounds: number;
27+
hardware: string[];
28+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
export interface ModelStat {
2+
modelId: string;
3+
engine?: string;
4+
duration?: string;
5+
status: string;
6+
vram?: string;
7+
ram?: string;
8+
}
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
import { Injectable } from '@nestjs/common';
2+
import si from 'systeminformation';
3+
import fs, { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
4+
import OpenAI from 'openai';
5+
import { Presets, SingleBar } from 'cli-progress';
6+
import yaml from 'js-yaml';
7+
import { FileManagerService } from '@/file-manager/file-manager.service';
8+
import { join } from 'path';
9+
import { ModelsCliUsecases } from './models.cli.usecases';
10+
import { spawn } from 'child_process';
11+
import { BenchmarkConfig } from '../types/benchmark-config.interface';
12+
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
13+
import { inspect } from 'util';
14+
import { defaultBenchmarkConfiguration } from '@/infrastructure/constants/benchmark';
15+
16+
@Injectable()
17+
export class BenchmarkCliUsecases {
18+
constructor(
19+
private readonly modelsCliUsecases: ModelsCliUsecases,
20+
private readonly cortexUsecases: CortexUsecases,
21+
private readonly fileService: FileManagerService,
22+
) {}
23+
24+
config: BenchmarkConfig;
25+
openai?: OpenAI;
26+
/**
27+
* Benchmark and analyze the performance of a specific AI model using a variety of system resources
28+
*/
29+
async benchmark() {
30+
return this.getBenchmarkConfig().then((config) => {
31+
this.config = config;
32+
33+
// TODO: Using OpenAI client or Cortex client to benchmark?
34+
this.openai = new OpenAI({
35+
apiKey: this.config.api.api_key,
36+
baseURL: this.config.api.base_url,
37+
timeout: 20 * 1000,
38+
});
39+
40+
spawn('cortex', ['serve'], {
41+
detached: false,
42+
});
43+
44+
return this.cortexUsecases
45+
.startCortex()
46+
.then(() =>
47+
this.modelsCliUsecases.startModel(this.config.api.parameters.model),
48+
)
49+
.then(() => this.runBenchmarks())
50+
.then(() => process.exit(0));
51+
});
52+
}
53+
54+
/**
55+
* Get the benchmark configuration
56+
* @returns the benchmark configuration
57+
*/
58+
private async getBenchmarkConfig() {
59+
const benchmarkFolder = await this.fileService.getBenchmarkPath();
60+
const configurationPath = join(benchmarkFolder, 'config.yaml');
61+
if (existsSync(configurationPath)) {
62+
return yaml.load(
63+
readFileSync(configurationPath, 'utf8'),
64+
) as BenchmarkConfig;
65+
} else {
66+
const config = yaml.dump(defaultBenchmarkConfiguration);
67+
if (!existsSync(benchmarkFolder)) {
68+
mkdirSync(benchmarkFolder, {
69+
recursive: true,
70+
});
71+
}
72+
await writeFileSync(configurationPath, config, 'utf8');
73+
return defaultBenchmarkConfiguration;
74+
}
75+
}
76+
77+
/**
78+
* Get the system resources for benchmarking
79+
* using the systeminformation library
80+
* @returns the system resources
81+
*/
82+
private async getSystemResources() {
83+
return {
84+
cpu: await si.currentLoad(),
85+
mem: await si.mem(),
86+
gpu: (await si.graphics()).controllers,
87+
};
88+
}
89+
90+
/**
91+
* Get the resource change between two data points
92+
* @param startData the start data point
93+
* @param endData the end data point
94+
* @returns the resource change
95+
*/
96+
private async getResourceChange(startData: any, endData: any) {
97+
return {
98+
cpu:
99+
startData.cpu && endData.cpu
100+
? ((endData.cpu.currentload - startData.cpu.currentload) /
101+
startData.cpu.currentload) *
102+
100
103+
: null,
104+
mem:
105+
startData.mem && endData.mem
106+
? ((endData.mem.used - startData.mem.used) / startData.mem.total) *
107+
100
108+
: null,
109+
};
110+
}
111+
112+
/**
113+
* Benchmark a user using the OpenAI API
114+
* @returns
115+
*/
116+
private async benchmarkUser() {
117+
const startResources = await this.getSystemResources();
118+
const start = Date.now();
119+
let tokenCount = 0;
120+
let firstTokenTime = null;
121+
122+
try {
123+
const stream = await this.openai!.chat.completions.create({
124+
model: this.config.api.parameters.model,
125+
messages: this.config.api.parameters.messages,
126+
max_tokens: this.config.api.parameters.max_tokens,
127+
stream: true,
128+
});
129+
130+
for await (const chunk of stream) {
131+
if (!firstTokenTime && chunk.choices[0]?.delta?.content) {
132+
firstTokenTime = Date.now();
133+
}
134+
tokenCount += (chunk.choices[0]?.delta?.content || '').split(
135+
/\s+/,
136+
).length;
137+
}
138+
} catch (error) {
139+
console.error('Error during API call:', error);
140+
return null;
141+
}
142+
143+
const latency = Date.now() - start;
144+
const ttft = firstTokenTime ? firstTokenTime - start : null;
145+
const endResources = await this.getSystemResources();
146+
const resourceChange = await this.getResourceChange(
147+
startResources,
148+
endResources,
149+
);
150+
151+
return {
152+
tokens: this.config.api.parameters.max_tokens,
153+
token_length: tokenCount, // Dynamically calculated token count
154+
latency,
155+
resourceChange,
156+
tpot: tokenCount ? latency / tokenCount : 0,
157+
throughput: tokenCount / (latency / 1000),
158+
ttft,
159+
};
160+
}
161+
162+
/**
163+
* Calculate the percentiles of the data
164+
* @param data the data to calculate percentiles for
165+
* @param percentile the percentile to calculate
166+
* @returns the percentile value
167+
*/
168+
private calculatePercentiles(data: number[], percentile: number) {
169+
if (data.length === 0) return null;
170+
const sorted = data
171+
.filter((x: number) => x !== null)
172+
.sort((a: number, b: number) => a - b);
173+
const pos = (percentile / 100) * sorted.length;
174+
if (pos < 1) return sorted[0];
175+
if (pos >= sorted.length) return sorted[sorted.length - 1];
176+
const lower = sorted[Math.floor(pos) - 1];
177+
const upper = sorted[Math.ceil(pos) - 1];
178+
return lower + (upper - lower) * (pos - Math.floor(pos));
179+
}
180+
181+
/**
182+
* Run the benchmarks
183+
*/
184+
private async runBenchmarks() {
185+
const allResults: any[] = [];
186+
const rounds = this.config.num_rounds || 1;
187+
188+
const bar = new SingleBar({}, Presets.shades_classic);
189+
bar.start(rounds, 0);
190+
191+
for (let i = 0; i < rounds; i++) {
192+
const roundResults = [];
193+
const hardwareBefore = await this.getSystemResources();
194+
195+
for (let j = 0; j < this.config.concurrency; j++) {
196+
const result = await this.benchmarkUser();
197+
if (result) {
198+
roundResults.push(result);
199+
}
200+
}
201+
202+
const hardwareAfter = await this.getSystemResources();
203+
const hardwareChanges = await this.getResourceChange(
204+
hardwareBefore,
205+
hardwareAfter,
206+
);
207+
208+
allResults.push({
209+
round: i + 1,
210+
results: roundResults,
211+
hardwareChanges,
212+
});
213+
214+
bar.update(i + 1);
215+
}
216+
217+
const metrics: any = {
218+
p50: {},
219+
p75: {},
220+
p95: {},
221+
};
222+
const keys = ['latency', 'tpot', 'throughput', 'ttft'];
223+
keys.forEach((key) => {
224+
const data = allResults.flatMap((r) =>
225+
r.results.map((res: object) => res[key as keyof typeof res]),
226+
);
227+
metrics.p50[key] = this.calculatePercentiles(data, 50);
228+
metrics.p75[key] = this.calculatePercentiles(data, 75);
229+
metrics.p95[key] = this.calculatePercentiles(data, 95);
230+
});
231+
232+
const output = {
233+
hardware: await this.getSystemResources(),
234+
results: allResults,
235+
metrics,
236+
};
237+
bar.stop();
238+
239+
const outputFilePath = join(
240+
await this.fileService.getBenchmarkPath(),
241+
'output.json',
242+
);
243+
fs.writeFileSync(outputFilePath, JSON.stringify(output, null, 2));
244+
console.log(`Benchmark results and metrics saved to ${outputFilePath}`);
245+
246+
console.log(
247+
inspect(output, { showHidden: false, depth: null, colors: true }),
248+
);
249+
}
250+
}

0 commit comments

Comments
 (0)