fix: gate onnx and tensorrt-llm run on darwin (#837)

louis-jan · marknguyen1302 · web-flow · commit 20ecd549eb42 · 2024-07-04T20:41:08.000+07:00
Co-authored-by: marknguyen1302 &lt;nguyenvu1302.work@gmail.com&gt;
diff --git a/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts b/cortex-js/src/infrastructure/commanders/shortcuts/run.command.ts
@@ -54,10 +54,14 @@ export class RunCommand extends CommandRunner {
         exit(1);
       }
     }
+
+    // Check model compatibility on this machine
+    await checkModelCompatibility(modelId, checkingSpinner);
+
     // If not exist
     // Try Pull
     if (!(await this.modelsCliUsecases.getModel(modelId))) {
-      checkingSpinner.succeed('Model not found. Attempting to pull...');
+      checkingSpinner.succeed();
       await this.modelsCliUsecases.pullModel(modelId).catch((e: Error) => {
         if (e instanceof ModelNotFoundException)
           checkingSpinner.fail('Model does not exist.');
@@ -73,16 +77,11 @@ export class RunCommand extends CommandRunner {
       !Array.isArray(existingModel.files) ||
       /^(http|https):\/\/[^/]+\/.*/.test(existingModel.files[0])
     ) {
-      checkingSpinner.fail(
-        `Model is not available`
-      );
+      checkingSpinner.fail(`Model is not available`);
       process.exit(1);
     }
     checkingSpinner.succeed('Model found');
 
-    // Check model compatibility on this machine
-    await checkModelCompatibility(modelId);
-
     const engine = existingModel.engine || Engines.llamaCPP;
     // Pull engine if not exist
     if (
diff --git a/cortex-js/src/utils/model-check.ts b/cortex-js/src/utils/model-check.ts
@@ -1,15 +1,23 @@
 import { MIN_CUDA_VERSION } from "@/infrastructure/constants/cortex";
 import { getCudaVersion } from "./cuda";
+import ora from "ora";
 
-export const checkModelCompatibility = async (modelId: string) => {  
+export const checkModelCompatibility = async (modelId: string, spinner?: ora.Ora) => {  
+  function log(message: string) {
+    if (spinner) {
+      spinner.fail(message);
+    } else {
+      console.error(message);
+    }
+  }
   if (modelId.includes('onnx') && process.platform !== 'win32') {
-    console.error('The ONNX engine does not support this OS yet.');
+    log('The ONNX engine does not support this OS yet.');
     process.exit(1);
   }
 
   if (modelId.includes('tensorrt-llm') ) {
     if(process.platform === 'darwin'){
-      console.error('Tensorrt-LLM models are not supported on this OS');
+      log('Tensorrt-LLM models are not supported on this OS');
       process.exit(1);
     }
 
@@ -19,11 +27,12 @@ export const checkModelCompatibility = async (modelId: string) => {
       const [requiredMajor, requiredMinor] = MIN_CUDA_VERSION.split('.').map(Number);
       const isMatchRequired = currentMajor > requiredMajor || (currentMajor === requiredMajor && currentMinor >= requiredMinor);
       if (!isMatchRequired) {
-        console.error(`CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`);
+        log(`CUDA version ${version} is not compatible with TensorRT-LLM models. Required version: ${MIN_CUDA_VERSION}`)
         process.exit(1);
       }
       } catch (e) {
         console.error(e.message ?? e);
+        log(e.message ?? e);
         process.exit(1);
       }