diff --git a/.github/workflows/train-model.yml b/.github/workflows/train-model.yml index a2b3137b..9c47a1dc 100644 --- a/.github/workflows/train-model.yml +++ b/.github/workflows/train-model.yml @@ -8,7 +8,7 @@ on: examples: description: "Number of AI-synthesized examples to generate (total after merge)" type: number - default: 150 + default: 50 required: false commit_model: description: "Commit the trained model back to the repository" @@ -67,8 +67,8 @@ jobs: # Verify it's running curl -s http://localhost:11434/api/tags && echo "Ollama server is up" - - name: Pull qwen3.5:9b model - run: ollama pull qwen3.5:9b + - name: Pull llama3.2:1b model + run: ollama pull llama3.2:1b # ── Build training tool ───────────────────────────────────────── - name: Build training tool @@ -84,7 +84,8 @@ jobs: dotnet run --project tools/JD.AI.Workflows.Training/JD.AI.Workflows.Training.csproj \ --no-build --configuration Release -- \ - --ollama-generate ${{ github.event.inputs.examples || 150 }} \ + --ollama-generate ${{ github.event.inputs.examples || 50 }} \ + --model llama3.2:1b \ --output src/JD.AI.Workflows/Models/intent_classifier.zip echo "examples=$(cat src/JD.AI.Workflows/Models/ollama_training_data.jsonl | wc -l)" >> $GITHUB_OUTPUT @@ -117,6 +118,7 @@ jobs: dotnet run --project tools/JD.AI.Workflows.Training/JD.AI.Workflows.Training.csproj \ --no-build --configuration Release -- \ --ollama-validate \ + --model llama3.2:1b \ --data src/JD.AI.Workflows/Models/ollama_training_data.jsonl # Fail if more than 15% disagreement with Ollama @@ -147,7 +149,7 @@ jobs: else git commit -m "ci: retrain intent classifier model - Generated ${{ steps.generate.outputs.examples }} examples via qwen3.5:9b + Generated ${{ steps.generate.outputs.examples }} examples via llama3.2:1b Triggered by: ${{ github.event_name }} Workflow run: ${{ github.run_id }}" echo "no_changes=false" >> $GITHUB_OUTPUT diff --git a/src/JD.AI.Workflows/Models/intent_classifier.zip b/src/JD.AI.Workflows/Models/intent_classifier.zip index 29443b57..cd68b3b7 100644 Binary files a/src/JD.AI.Workflows/Models/intent_classifier.zip and b/src/JD.AI.Workflows/Models/intent_classifier.zip differ diff --git a/src/JD.AI.Workflows/Models/ollama_training_data.jsonl b/src/JD.AI.Workflows/Models/ollama_training_data.jsonl new file mode 100644 index 00000000..eeb17692 --- /dev/null +++ b/src/JD.AI.Workflows/Models/ollama_training_data.jsonl @@ -0,0 +1,50 @@ +{"Prompt":"Write integration tests for the monitoring API","IsWorkflow":false} +{"Prompt":"Archive old debugging logs to S3","IsWorkflow":false} +{"Prompt":"Archive old npm logs to S3","IsWorkflow":true} +{"Prompt":"What would make python more maintainable?","IsWorkflow":false} +{"Prompt":"Create a worktree for the python feature branch","IsWorkflow":false} +{"Prompt":"Deploy database to the staging environment","IsWorkflow":true} +{"Prompt":"Scale the git service to 3 replicas","IsWorkflow":false} +{"Prompt":"Background: cloud has been causing issues since last week","IsWorkflow":true} +{"Prompt":"Does cloud support WebSocket connections?","IsWorkflow":false} +{"Prompt":"Add fuzzing tests for the database input handler","IsWorkflow":false} +{"Prompt":"Seed the cloud database with test fixtures","IsWorkflow":true} +{"Prompt":"How do I configure ci-cd authentication?","IsWorkflow":false} +{"Prompt":"Replace all TODO comments in the file-ops codebase with proper issues","IsWorkflow":false} +{"Prompt":"What do you think about networking for this use case?","IsWorkflow":false} +{"Prompt":"Write a Dockerfile for the monitoring service","IsWorkflow":false} +{"Prompt":"How can we improve the api developer experience?","IsWorkflow":false} +{"Prompt":"What\u0027s new in the devops release notes?","IsWorkflow":false} +{"Prompt":"Add a required reviewer to the shell PR pipeline","IsWorkflow":false} +{"Prompt":"Write a Dockerfile for the database service","IsWorkflow":true} +{"Prompt":"Background: docs has been causing issues since last week","IsWorkflow":false} +{"Prompt":"Revert the last commit that broke shell","IsWorkflow":false} +{"Prompt":"Tag the release v1.14.0 for npm","IsWorkflow":true} +{"Prompt":"What\u0027s new in the git release notes?","IsWorkflow":false} +{"Prompt":"How would you design code-review from scratch?","IsWorkflow":false} +{"Prompt":"What are the known limitations of monitoring?","IsWorkflow":false} +{"Prompt":"What do you think about docker-compose for this use case?","IsWorkflow":false} +{"Prompt":"Does devops support WebSocket connections?","IsWorkflow":true} +{"Prompt":"Update the CHANGELOG for git","IsWorkflow":false} +{"Prompt":"Background: database has been causing issues since last week","IsWorkflow":false} +{"Prompt":"Create a PR that adds telemetry and add reviewers","IsWorkflow":false} +{"Prompt":"Remove the dead code from the devops module","IsWorkflow":false} +{"Prompt":"Should we use git or stick with the current approach?","IsWorkflow":false} +{"Prompt":"Squash the last 5 commits and force push","IsWorkflow":false} +{"Prompt":"Update the CHANGELOG for git","IsWorkflow":false} +{"Prompt":"Replace all TODO comments in the networking codebase with proper issues","IsWorkflow":false} +{"Prompt":"Write integration tests for the shell API","IsWorkflow":false} +{"Prompt":"Does debugging support WebSocket connections?","IsWorkflow":false} +{"Prompt":"What patterns work best for docker-compose at scale?","IsWorkflow":false} +{"Prompt":"Fix the formatting in all database source files","IsWorkflow":true} +{"Prompt":"Tag the release v1.1.0 for file-ops","IsWorkflow":false} +{"Prompt":"Run a migration to add index on docs.events","IsWorkflow":false} +{"Prompt":"Build and push the testing image to the registry","IsWorkflow":true} +{"Prompt":"Write integration tests for the testing API","IsWorkflow":false} +{"Prompt":"Audit the npm packages in cloud for vulnerabilities","IsWorkflow":false} +{"Prompt":"Background: networking has been causing issues since last week","IsWorkflow":false} +{"Prompt":"Prune all stopped containers and dangling images","IsWorkflow":false} +{"Prompt":"Does code-review support WebSocket connections?","IsWorkflow":true} +{"Prompt":"Check the health of devops in production","IsWorkflow":false} +{"Prompt":"Dump the database production DB to a file","IsWorkflow":false} +{"Prompt":"Should we use git or stick with the current approach?","IsWorkflow":false} diff --git a/tools/JD.AI.Workflows.Training/OllamaClient.cs b/tools/JD.AI.Workflows.Training/OllamaClient.cs index cc825144..e07862d9 100644 --- a/tools/JD.AI.Workflows.Training/OllamaClient.cs +++ b/tools/JD.AI.Workflows.Training/OllamaClient.cs @@ -16,7 +16,7 @@ public sealed class OllamaClient : IDisposable public OllamaClient(string model, string baseUrl = "http://localhost:11434") { _model = model; - _http = new HttpClient { BaseAddress = new Uri(baseUrl), Timeout = TimeSpan.FromMinutes(5) }; + _http = new HttpClient { BaseAddress = new Uri(baseUrl), Timeout = TimeSpan.FromMinutes(10) }; } /// diff --git a/tools/JD.AI.Workflows.Training/Program.cs b/tools/JD.AI.Workflows.Training/Program.cs index 30d47d82..d4d3b461 100644 --- a/tools/JD.AI.Workflows.Training/Program.cs +++ b/tools/JD.AI.Workflows.Training/Program.cs @@ -49,6 +49,10 @@ public static async Task Main(string[] args) ? int.Parse(args[ollamaGenerateArgIdx + 1], System.Globalization.CultureInfo.InvariantCulture) : null; var ollamaValidate = args.Contains("--ollama-validate"); + var ollamaModelArgIdx = Array.IndexOf(args, "--model"); + var ollamaModel = ollamaModelArgIdx >= 0 && ollamaModelArgIdx + 1 < args.Length + ? args[ollamaModelArgIdx + 1] + : "qwen3.5:9b"; if (benchmark) { @@ -69,7 +73,7 @@ public static async Task Main(string[] args) Directory.CreateDirectory(dir); List generated = []; - using (var synthesizer = new AiTrainingDataSynthesizer(ollamaHost: ollamaHost)) + using (var synthesizer = new AiTrainingDataSynthesizer(model: ollamaModel, ollamaHost: ollamaHost)) { await AnsiConsole.Progress() .StartAsync(async ctx => @@ -106,7 +110,7 @@ await AnsiConsole.Progress() var prompts = TrainingDataGenerator.ReadCsv(dataPath); List discrepancies = []; - using (var synthesizer = new AiTrainingDataSynthesizer(ollamaHost: ollamaHost)) + using (var synthesizer = new AiTrainingDataSynthesizer(model: ollamaModel, ollamaHost: ollamaHost)) { await AnsiConsole.Progress() .StartAsync(async ctx =>