Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/build-all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ permissions:
contents: read

on:
push:
branches: ["release/*"]
pull_request:
types: [opened, reopened, closed]
branches: ["release/*"]
release:
types: [published]
Expand All @@ -15,7 +14,7 @@ on:
branch:
description: 'The branch name or tag to run the workflow on'
required: true
default: 'main'
default: 'dev'
type: string

env:
Expand All @@ -27,6 +26,7 @@ jobs:
runs-on: [self-hosted, paicicd]
timeout-minutes: 120
environment: auto-test
if: github.event_name != 'pull_request' || ( github.event.action == 'opened' || github.event.action == 'reopened' || github.event.pull_request.merged == true)
container:
image: ubuntu:latest
volumes:
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/build-deploy-changes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ permissions:
contents: read

on:
push:
branches: [main, dev, "release/*"]
pull_request:
branches: [main, dev, "release/*"]

Expand Down
2 changes: 1 addition & 1 deletion contrib/kubespray/script/environment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ echo "Install sshpass"
sudo apt-get -y install sshpass

echo "Install kubespray's requirements and ansible is included"
sudo apt-get -y remove python3-cryptography # avoid conflict with pip
sudo apt-get -y remove python3-cryptography python3-cffi # avoid conflict with pip
sudo python3 -m pip install -r ${HOME}/pai-deploy/kubespray/requirements.txt

# workaround python3-apt issue
Expand Down
1 change: 1 addition & 0 deletions contrib/kubespray/script/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ jinja2==3.1.4
pyOpenSSL==24.1.0
requests==2.32.3
oauthlib==3.3.1
cffi==2.0.0
39 changes: 39 additions & 0 deletions docs/LuciaTrainingPlatform/blog/2026-02-06-release-1-5.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
---
slug: release-ltp-v1.5
title: Releasing Lucia Training Platform v1.5
author: Lucia Training Platform Team
tags: [ltp, announcement, release]
---

We are pleased to announce the official release of **Lucia Training Platform v1.5.0**!

## Lucia Training Platform v1.5.0 Release Notes

This release focuses on platform stability improvements, deployment enhancements, hardware compatibility, and security updates.

## Platform Features & Stability
- Added feature to track user's historical jobs in non-existing virtual clusters including the job status and logs
- Support for assigning custom job names
- Update the workflow trigger conditions to make the workflow triggered and executed only when a pull request targeting main, dev or release/* branch is closed

## Deployment & Hardware Support
- Fixed kubespray deployment on bare metal when cffi package is installed by package manager
- Fixed NUMA parsing in job exporter on GB200 hardware
- Fixed job exporter compatibility issue on ARM nodes
- Enhanced multi-architecture support across platform components

## Security
- Updated Kubernetes scheduler version to 1.33.1
- Updated Go version to 1.24.9 for framework controller, hivedscheduler, and watchdog
- Updated Node.js packages for alert-handler, job-status-change-notification, rest-server
- Updated Python packages for cluster local storage, copilot-chat, and dashboard-data-backup
- Updated RPM packages for database-controller
- Updated Docker version for webportal-dind
- Fixed security issues in multiple component dependencies
- Fixed the module "logger" missing when running DCGM with higher versions

## Storage & Infrastructure
- Mounted SSH key pairs for cluster local storage
- Enhanced cluster local storage security and stability
- Fixed Docker pull problem after Docker version update

Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def get_core_numa_mapping(core_count):
numa_mapping = {}
lines = output.split('\n')
for line in lines:
if 'node ' in line and 'cpus' in line:
if 'node ' in line and 'cpus' in line and not line.strip().endswith('cpus:'):
current_numa_domain = int(re.search(r'node (\d+)', line).group(1))
if ':' in line:
cpus_str = line.split(': ')[1].split()
Expand Down
3 changes: 2 additions & 1 deletion src/rest-server/src/models/v2/job/k8s.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ const runtimeEnv = require('./runtime-env');
const launcherConfig = require('@pai/config/launcher');
const createError = require('@pai/utils/error');
const protocolSecret = require('@pai/utils/protocolSecret');
const userModel = require('@pai/models/v2/user');
const tokenModel = require('@pai/models/token');
const storageModel = require('@pai/models/v2/storage');
const logger = require('@pai/config/logger');
Expand Down Expand Up @@ -1135,6 +1134,8 @@ const get = async (frameworkName, jobAttemptId) => {
};

const put = async (frameworkName, config, rawConfig) => {
// Lazy load to avoid circular dependency
const userModel = require('@pai/models/v2/user');
const [userName] = frameworkName.split(/~(.+)/);

const virtualCluster =
Expand Down
7 changes: 6 additions & 1 deletion src/rest-server/src/models/v2/storage.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
// module dependencies
const status = require('statuses');
const createError = require('@pai/utils/error');
const user = require('@pai/models/v2/user');
const secret = require('@pai/models/kubernetes/k8s-secret');
const kubernetes = require('@pai/models/kubernetes/kubernetes');
const logger = require('@pai/config/logger');
Expand Down Expand Up @@ -178,6 +177,9 @@ const convertVolumeDetail = async (pvc) => {
};

const list = async (userName, filterDefault = false) => {
// Lazy require to avoid circular dependency
const user = require('@pai/models/v2/user');

let response;
if (pvcCache.has('storageList')) {
logger.info('Read persistant volume claim list from cache');
Expand Down Expand Up @@ -247,6 +249,9 @@ const list = async (userName, filterDefault = false) => {
};

const get = async (storageName, userName) => {
// Lazy require to avoid circular dependency
const user = require('@pai/models/v2/user');

let response;
if (pvcCache.has(storageName)) {
logger.info(`Read persistant volume claim from cache: ${storageName}`);
Expand Down
4 changes: 3 additions & 1 deletion src/rest-server/src/utils/manager/user/crudK8sSecret.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ const logger = require('@pai/config/logger');
const groupModel = require('@pai/models/v2/group');
const k8sModel = require('@pai/models/kubernetes/kubernetes');
const { Mutex } = require('async-mutex');
const { job } = require('@pai/models/v2/job');

const USER_NAMESPACE = process.env.PAI_USER_NAMESPACE || 'pai-user-v2';

Expand All @@ -44,6 +43,9 @@ const cache = new Map();
const readMutex = new Mutex();

async function getHistoryVCs(name, grouplist, retrieveFromHistory=true) {
// Lazy require to avoid circular dependency
const { job } = require('@pai/models/v2/job');

// Retrieve VC list from the user's job history
let vcsFromJob = [];
if (retrieveFromHistory) {
Expand Down
Loading