diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d6f5560f..d4492d0f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -186,7 +186,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_ADAPT }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_ADAPT }} CLOUDOS_URL: "https://cloudos.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos workflow import --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --workflow-name imported_from_gitlab --workflow-url https://gitlab.com/lifebit-ai/spammer-nf --repository-platform gitlab import_github: @@ -210,7 +209,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_ADAPT }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_ADAPT }} CLOUDOS_URL: "https://cloudos.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos workflow import --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --workflow-name imported_from_github --workflow-url https://github.com/lifebit-ai/spammer-nf --repository-platform github job_run_and_status: @@ -308,7 +306,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_ADAPT }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_ADAPT }} CLOUDOS_URL: "https://cloudos.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos job ${{ matrix.feature }} --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --job-id ${{ needs.job_run_and_status.outputs.job_id }} workflow_list: @@ -900,3 +897,51 @@ jobs: --instance-type m4.xlarge \ --params-file "$PARAMS_FILE" \ --wait-completion + interactive_session_list: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: setup.py + - name: Install dependencies + run: | + pip install -e . + - name: Run tests + env: + CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_ADAPT }} + CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_ADAPT }} + CLOUDOS_URL: "https://cloudos.lifebit.ai" + run: | + echo q |cloudos interactive-session list --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID + interactive_session_create: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: setup.py + - name: Install dependencies + run: | + pip install -e . + - name: Run tests + env: + CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_ADAPT }} + CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_ADAPT }} + PROJECT_NAME: "cloudos-cli-tests" + CLOUDOS_URL: "https://cloudos.lifebit.ai" + run: | + cloudos interactive-session create --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --project-name "$PROJECT_NAME" --name ci_test_cli --session-type jupyter --shutdown-in 10m + diff --git a/.github/workflows/ci_az.yml b/.github/workflows/ci_az.yml index 52aec7d3..3488865a 100644 --- a/.github/workflows/ci_az.yml +++ b/.github/workflows/ci_az.yml @@ -167,7 +167,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_AZURE }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_AZURE }} CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos workflow import --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --workflow-name imported_from_gitlab --workflow-url https://gitlab.com/lifebit-ai/spammer-nf --repository-platform gitlab import_github_az: @@ -191,7 +190,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_AZURE }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_AZURE }} CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos workflow import --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --workflow-name imported_from_github --workflow-url https://github.com/lifebit-ai/spammer-nf --repository-platform github job_run_and_status_az: @@ -289,7 +287,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_AZURE }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_AZURE }} CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos job ${{ matrix.feature }} --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --job-id ${{ needs.job_run_and_status_az.outputs.job_id }} workflow_list_az: @@ -686,3 +683,52 @@ jobs: --params-file "$PARAMS_FILE" \ --execution-platform "azure" \ --wait-completion + interactive_session_list: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: setup.py + - name: Install dependencies + run: | + pip install -e . + - name: Run tests + env: + CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_AZURE }} + CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_AZURE }} + CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" + run: | + echo q |cloudos interactive-session list --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID + interactive_session_create: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: setup.py + - name: Install dependencies + run: | + pip install -e . + - name: Run tests + env: + CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_AZURE }} + CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_AZURE }} + CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" + PROJECT_NAME: "cloudos-cli-tests" + INSTANCE_TYPE: "Standard_D4as_v4" + run: | + cloudos interactive-session create --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --project-name "$PROJECT_NAME" --name ci_test_cli --session-type jupyter --shutdown-in 10m --execution-platform azure --instance $INSTANCE_TYPE + diff --git a/.github/workflows/ci_dev.yml b/.github/workflows/ci_dev.yml index 683c1256..f7471f6d 100644 --- a/.github/workflows/ci_dev.yml +++ b/.github/workflows/ci_dev.yml @@ -167,7 +167,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_DEV }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_DEV }} CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos workflow import --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --workflow-name imported_from_gitlab --workflow-url https://gitlab.com/lifebit-ai/spammer-nf --repository-platform gitlab import_github_dev: @@ -191,7 +190,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_DEV }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_DEV }} CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos workflow import --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --workflow-name imported_from_github --workflow-url https://github.com/lifebit-ai/spammer-nf --repository-platform github job_run_and_status_dev: @@ -289,7 +287,6 @@ jobs: CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_DEV }} CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_DEV }} CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" - GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} run: | cloudos job ${{ matrix.feature }} --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --job-id ${{ needs.job_run_and_status_dev.outputs.job_id }} workflow_list_dev: @@ -906,3 +903,50 @@ jobs: --instance-type m4.xlarge \ --params-file "$PARAMS_FILE" \ --wait-completion + interactive_session_list: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.9" ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: setup.py + - name: Install dependencies + run: | + pip install -e . + - name: Run tests + env: + CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_DEV }} + CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_DEV }} + CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" + run: | + echo q |cloudos interactive-session list --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID + interactive_session_create: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: setup.py + - name: Install dependencies + run: | + pip install -e . + - name: Run tests + env: + CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_DEV }} + CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_DEV }} + CLOUDOS_URL: "https://dev.sdlc.lifebit.ai" + PROJECT_NAME: "cloudos-cli-tests" + run: | + cloudos interactive-session create --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --project-name "$PROJECT_NAME" --name ci_test_cli --session-type jupyter --shutdown-in 10m diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e3aee25..99da5c72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.83.0 (2026-03-18) + +### Feat + +- Adds interactive session class +- Adds interactive session listing +- Adds interactive session creation + ## v2.82.1 (2026-03-11) ### Patch diff --git a/README.md b/README.md index 8dde9eaa..7155eda5 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,9 @@ Python package for interacting with CloudOS - [Custom Script Path](#custom-script-path) - [Custom Script Project](#custom-script-project) - [Use multiple projects for files in `--parameter` option](#use-multiple-projects-for-files-in---parameter-option) + - [Interactive Sessions](#interactive-sessions) + - [List Interactive Sessions](#list-interactive-sessions) + - [Create Interactive Session](#create-interactive-session) - [Datasets](#datasets) - [List Files](#list-files) - [Move Files](#move-files) @@ -276,7 +279,14 @@ To generate a named profile, use the following command: cloudos configure --profile {profile-name} ``` -The same prompts will appear. If a profile with the same name already exists, the current parameters will appear in square brackets and can be overwritten or left unchanged by pressing Enter/Return. +The same prompts will appear, including the execution platform (aws or azure). If a profile with the same name already exists, the current parameters will appear in square brackets and can be overwritten or left unchanged by pressing Enter/Return. + +When configuring a profile, you can specify: +- **API Key**: Your CloudOS API credentials +- **CloudOS URL**: The CloudOS instance URL +- **Project Name**: Default project for commands +- **Execution Platform**: `aws` (default) or `azure` - determines default instance types and available features +- **Repository Platform**: Version control system (github, gitlab, etc.) > [!NOTE] > When there is already at least 1 previous profile defined, a new question will appear asking to make the current profile as default @@ -1932,6 +1942,237 @@ will take all `csv` file extensions in the specified folder. --- + + +### Interactive Sessions + +Interactive sessions allow you to work within the platform using different virtual environments (Jupyter Notebooks, RStudio, VS Code, etc.). You can list, monitor, and manage your interactive sessions using the CLI. + +#### List Interactive Sessions + +You can get a list of all interactive sessions in your workspace by running `cloudos interactive-session list`. The command can produce three different output formats that can be selected using the `--output-format` option: + +- **stdout** (default): Displaysa table directly in the terminal with interactive pagination +- **csv**: Saves session data to a CSV file with a minimum predefined set of columns by default, or all available columns using the `--all-fields` parameter +- **json**: Saves complete session information to a JSON file with all available fields + +To display the list of interactive sessions as a formatted table in the terminal: + +```bash +cloudos interactive-session list --profile my_profile +# or explicitly: +cloudos interactive-session list --profile my_profile --output-format stdout +``` + +The table displays sessions with pagination controls (press `n` for next page, `p` for previous page, or `q` to quit): + +```console + Interactive Sessions +┏━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┓ +┃ Status ┃ Name ┃ Type ┃ ID ┃ Owner ┃ +┡━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━┩ +│ stopped │ cloudosR │ awsRstudio │ 69aee0dba197… │ Leila │ +│ running │ analysis-dev │ awsJupyterNotebook │ 69ae972a18f0… │ John │ +│ stopped │ test_session │ awsVSCode │ 69a996c098ab… │ James │ +└─────────┴──────────────┴────────────────────┴───────────────┴────────┘ + +Total sessions: 15 +Page: 1 of 3 +Sessions on this page: 5 + +n = next, p = prev, q = quit +``` + +To save sessions to a CSV file with all available fields: + +```bash +cloudos interactive-session list --profile my_profile --output-format csv --all-fields +``` + +The expected output is something similar to: + +```console +Interactive session list saved to interactive_sessions_list.csv +``` + +To save the same information in JSON format: + +```bash +cloudos interactive-session list --profile my_profile --output-format json +``` + +```console +Interactive session list collected with a total of 15 sessions. +Interactive session list saved to interactive_sessions_list.json +``` + +**Filtering Options** + +You can filter sessions by status and other criteria: + +```bash +# Filter by status (setup, initialising, running, scheduled, stopped) +cloudos interactive-session list --profile my_profile --filter-status running + +# Show only your own sessions +cloudos interactive-session list --profile my_profile --filter-only-mine + +# Include archived sessions +cloudos interactive-session list --profile my_profile --archived + +# Custom pagination +cloudos interactive-session list --profile my_profile --limit 20 --page 2 +``` + +**Table Columns** + +You can customize which columns to display: + +```bash +# Display specific columns +cloudos interactive-session list --profile my_profile --table-columns "status,name,owner,project,created_at,cost" +``` + +Available columns: `backend`, `cost`, `cost_limit`, `created_at`, `id`, `instance`, `name`, `owner`, `project`, `resources`, `runtime`, `saved_at`, `spot`, `status`, `time_left`, `type`, `version` + + +#### Create Interactive Session + +You can create and start a new interactive session using the `cloudos interactive-session create` command. This command provisions a new virtual environment with your specified configuration. + +The command automatically loads API credentials and workspace information from your profile configuration, so you only need to specify the session-specific details. + +**Basic Usage** + +Create a simple Jupyter notebook session: + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "My Analysis" \ + --session-type jupyter +``` + +Create an RStudio session with specific R version: + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "R Analysis" \ + --session-type rstudio \ + --r-version 4.4.2 +``` + +Create a VS Code session (AWS only): + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "Development" \ + --session-type vscode +``` + +Create a Spark cluster session with custom instance types (AWS only): + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "Spark Analysis" \ + --session-type spark \ + --spark-master c5.2xlarge \ + --spark-core c5.xlarge \ + --spark-workers 3 +``` + +**Configuration Options** + +You can customize your session with various options: + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "Advanced Session" \ + --session-type jupyter \ + --instance c5.2xlarge \ + --storage 1000 \ + --spot \ + --shared \ + --cost-limit 50.0 \ + --shutdown-in 8h +``` + +**Required for Each Session:** +- `--name`: Session name (1-100 characters) +- `--session-type`: Type of backend - `jupyter`, `vscode`, `rstudio`, or `spark` (platform dependent) + +**Optional Configuration:** +- `--instance`: Instance type (default depends on execution platform: `c5.xlarge` for AWS, `Standard_F1s` for Azure) +- `--storage`: Storage in GB (default: 500, range: 100-5000) +- `--spot`: Use spot instances (AWS only, cost-saving) +- `--shared`: Make session accessible to workspace members +- `--cost-limit`: Compute cost limit in USD (default: -1 for unlimited) +- `--shutdown-in`: Auto-shutdown duration (e.g., `8h`, `2d`, `30m`) + +**Data & Storage Management:** +- `--mount`: Mount a data file into the session. Supports both CloudOS datasets and S3 files (AWS only). Format: `project_name/dataset_path` (e.g., `leila-test/Data/file.csv`) or `s3://bucket/path/to/file` (e.g., `s3://my-bucket/data/file.csv`). Can be used multiple times. +- `--link`: Link a folder into the session for read/write access (AWS only). Supports S3 folders and CloudOS folders. Format: `s3://bucket/prefix` (e.g., `s3://my-bucket/data/`) or `project_name/folder_path` (e.g., `leila-test/AnalysesResults/analysis_id/results`). Can be used multiple times. **Note:** Linking is not supported on Azure. Use CloudOS file explorer for data access. + +**Backend-Specific:** +- `--r-version`: R version for RStudio (options: `4.4.2`, `4.5.2`) - **optional for rstudio** (default: `4.4.2`) +- `--spark-master`: Master instance type for Spark (default: `c5.2xlarge`) +- `--spark-core`: Core instance type for Spark (default: `c5.xlarge`) +- `--spark-workers`: Initial worker count for Spark (default: 1) +- `--verbose`: Show detailed progress messages + + +**Data Management** + +CloudOS CLI supports multiple ways to access data in interactive sessions, depending on your execution platform: +- you can load data directly into the session (i.e. files are copied into the session's mounted-data volume) +- you can link folders to your session (i.e the folders are sym-linked to the session). This works only for folders (S3-based) and only in AWS enviornments. + + +**Data Mounting Examples** + +Mount a file from File Explorer: + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "Data Analysis" \ + --session-type jupyter \ + --mount "my_project/training_data.csv" +``` + +Link an S3 bucket: + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "S3 Access" \ + --session-type jupyter \ + --link "s3://my-results-bucket/output/" +``` + + +**Output Display** + +The session creation output displays a success message with session details: + +```bash +cloudos interactive-session create \ + --profile my_profile \ + --name "My Session" \ + --session-type jupyter +``` + +The output shows the session details including: +- Session ID +- Session name +- Backend type (jupyter, vscode, rstudio, spark) +- Current status (scheduled, initialising, setup, running, stopped) + + ### Datasets Manage files and folders within your CloudOS File Explorer programmatically. These commands provide comprehensive file management capabilities for organizing research data and results. diff --git a/cloudos_cli/__init__.py b/cloudos_cli/__init__.py index cd923cb5..1a6b6ebe 100755 --- a/cloudos_cli/__init__.py +++ b/cloudos_cli/__init__.py @@ -8,4 +8,4 @@ from .clos import Cloudos from ._version import __version__ -__all__ = ['jobs', 'utils', 'clos', 'queue', 'configure', 'datasets', 'import_wf'] +__all__ = ['jobs', 'utils', 'clos', 'queue', 'configure', 'datasets', 'import_wf', 'interactive_session'] diff --git a/cloudos_cli/__main__.py b/cloudos_cli/__main__.py index ae89133b..f0963c81 100644 --- a/cloudos_cli/__main__.py +++ b/cloudos_cli/__main__.py @@ -25,6 +25,7 @@ from cloudos_cli.datasets.cli import datasets from cloudos_cli.configure.cli import configure from cloudos_cli.link.cli import link +from cloudos_cli.interactive_session.cli import interactive_session # Install the custom exception handler @@ -63,6 +64,7 @@ def run_cloudos_cli(ctx): run_cloudos_cli.add_command(datasets) run_cloudos_cli.add_command(configure) run_cloudos_cli.add_command(link) +run_cloudos_cli.add_command(interactive_session) if __name__ == '__main__': run_cloudos_cli() diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py index c03de08a..23b8a670 100644 --- a/cloudos_cli/_version.py +++ b/cloudos_cli/_version.py @@ -1 +1 @@ -__version__ = '2.82.1' +__version__ = '2.83.0' diff --git a/cloudos_cli/clos.py b/cloudos_cli/clos.py index f6a9f0a0..55e68ffc 100644 --- a/cloudos_cli/clos.py +++ b/cloudos_cli/clos.py @@ -2252,3 +2252,208 @@ def workflow_content_query(self, workspace_id, workflow_name, verify=True, query # use 'query' to look in the content return [wf.get(query) for wf in content.get("workflows", []) if wf.get("name") == workflow_name] + + def get_interactive_session_list(self, team_id, page=None, limit=None, status=None, + owner_only=False, include_archived=False, verify=True): + """Get interactive sessions from a CloudOS team. + + Parameters + ---------- + team_id : string + The CloudOS team id (workspace id) to retrieve sessions from. + page : int, optional + Page number for pagination. Default=1. + limit : int, optional + Number of results per page. Default=10, max=100. + status : list of string, optional + Filter by session status. Valid values: running, stopped, provisioning, scheduled. + owner_only : bool, optional + If True, retrieve only the current user's sessions. + include_archived : bool, optional + If True, include archived sessions in results. + verify: [bool|string], default=True + Whether to use SSL verification or not. Alternatively, if + a string is passed, it will be interpreted as the path to + the SSL certificate file. + + Returns + ------- + dict + A dict with 'sessions' list and 'pagination_metadata'. + """ + # Validate team_id + if not team_id or not isinstance(team_id, str): + raise ValueError("Invalid team_id: must be a non-empty string") + + # Set defaults + current_page = page if page is not None else 1 + current_limit = limit if limit is not None else 10 + + # Validate pagination parameters + if current_page <= 0 or not isinstance(current_page, int): + raise ValueError('Please use a positive integer (>= 1) for the page parameter') + if current_limit <= 0 or not isinstance(current_limit, int): + raise ValueError('Please use a positive integer (>= 1) for the limit parameter') + if current_limit > 100: + raise ValueError('Limit cannot exceed 100') + + headers = { + "Content-type": "application/json", + "apikey": self.apikey + } + + # Build query parameters + params = { + "teamId": team_id, + "page": current_page, + "limit": current_limit + } + + # Add optional filters + if status: + # status is a list of valid status values (user-friendly names) + # Include both spellings and API names for flexibility + valid_statuses = ['setup', 'initialising', 'initializing', 'running', 'scheduled', 'stopped', 'aborted'] + for s in status: + if s.lower() not in valid_statuses: + raise ValueError(f"Invalid status '{s}'. Valid values: {', '.join(valid_statuses)}") + # Map user-friendly status names to API status names + # The API uses various names: 'ready' and 'aborted' but we display them as 'running' and 'stopped' to users + status_mapping = { + 'setup': 'setup', + 'initialising': 'initialising', + 'initializing': 'initialising', # Accept both spellings + 'running': 'ready', # API uses 'ready' for running sessions + 'scheduled': 'scheduled', + 'stopped': 'aborted', + 'aborted': 'aborted' # Also accept 'aborted' as input + } + mapped_statuses = [status_mapping[s.lower()] for s in status] + # Add status[] parameters (multiple status filters) + # requests library will convert list to multiple params with same name + params["status[]"] = mapped_statuses + + if owner_only: + params["onlyOwnerSessions"] = "true" + + if include_archived: + params["archived.status"] = "true" + else: + params["archived.status"] = "false" + + # Make the API request + url = f"{self.cloudos_url}/api/v3/interactive-sessions" + r = retry_requests_get(url, params=params, headers=headers, verify=verify) + + if r.status_code >= 400: + raise BadRequestException(r) + + content = r.json() + + # Extract sessions and pagination metadata + # The API returns sessions under 'sessions' key + sessions = content.get('sessions', []) if isinstance(content, dict) else [] + + # Build pagination metadata from response + # API returns Pagination-Count, Pagination-Page, Pagination-Limit + pagination_info = content.get('paginationMetadata', {}) + total_count = pagination_info.get('Pagination-Count', len(sessions)) + + pagination_metadata = { + 'count': total_count, + 'page': current_page, + 'limit': current_limit, + 'totalPages': (total_count + current_limit - 1) // current_limit if current_limit > 0 else 1 + } + + return {'sessions': sessions, 'pagination_metadata': pagination_metadata} + + def create_interactive_session(self, team_id, payload, verify=True): + """Create and start a new interactive session. + + Parameters + ---------- + team_id : string + The CloudOS team id (workspace id) to create session in. + payload : dict + Complete session creation payload with configuration, data items, etc. + verify: [bool|string], default=True + Whether to use SSL verification or not. Alternatively, if + a string is passed, it will be interpreted as the path to + the SSL certificate file. + + Returns + ------- + dict + Session object from API response with _id, status, and all configuration. + """ + # Validate team_id + if not team_id or not isinstance(team_id, str): + raise ValueError("Invalid team_id: must be a non-empty string") + + headers = { + "Content-type": "application/json", + "apikey": self.apikey + } + + # Build URL with teamId query parameter + url = f"{self.cloudos_url}/api/v1/interactive-sessions?teamId={team_id}" + + # Make the API request with POST method + try: + r = retry_requests_post( + url, + headers=headers, + data=json.dumps(payload), + verify=verify, + timeout=30 + ) + except Exception as e: + raise Exception(f"Failed to create interactive session: {str(e)}") + + if r.status_code >= 400: + raise BadRequestException(r) + + # Return the full session object from response + content = r.json() + return content + + ## FOR FUTURE COMMANDS IMPLEMENTATION + # def get_interactive_session(self, team_id, session_id, verify=True): + # """Get details of a specific interactive session. + + # Parameters + # ---------- + # team_id : string + # The CloudOS team id (workspace id). + # session_id : string + # The interactive session id (MongoDB ObjectId). + # verify: [bool|string], default=True + # Whether to use SSL verification or not. + + # Returns + # ------- + # dict + # Session object with current status and full details. + # """ + # if not team_id or not isinstance(team_id, str): + # raise ValueError("Invalid team_id: must be a non-empty string") + + # if not session_id or not isinstance(session_id, str): + # raise ValueError("Invalid session_id: must be a non-empty string") + + # headers = { + # "Content-type": "application/json", + # "apikey": self.apikey + # } + + # # Build URL for getting specific session + # url = f"{self.cloudos_url}/api/v2/interactive-sessions/{session_id}?teamId={team_id}" + + # r = retry_requests_get(url, headers=headers, verify=verify) + + # if r.status_code >= 400: + # raise BadRequestException(r) + + # content = r.json() + # return content diff --git a/cloudos_cli/configure/configure.py b/cloudos_cli/configure/configure.py index 1c816d37..b84af227 100644 --- a/cloudos_cli/configure/configure.py +++ b/cloudos_cli/configure/configure.py @@ -2,6 +2,10 @@ from pathlib import Path import configparser import click +import functools +import inspect +import sys +from rich.console import Console from cloudos_cli.logging.logger import update_command_context_from_click from cloudos_cli.constants import CLOUDOS_URL, INIT_PROFILE @@ -649,7 +653,6 @@ def job_details(ctx, apikey, workspace_id, job_id, ...): function Decorated function with automatic profile configuration loading. """ - import functools if required_params is None: required_params = [] @@ -657,8 +660,6 @@ def job_details(ctx, apikey, workspace_id, job_id, ...): def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): - import inspect - # Get context from args or kwargs ctx = kwargs.get('ctx') or (args[0] if args and isinstance(args[0], click.Context) else None) @@ -722,7 +723,6 @@ def wrapper(*args, **kwargs): # Only update kwargs with parameters that the function actually accepts # AND that were not explicitly provided by the user on the command line # AND that have a meaningful value from the profile (not None) - import sys for key, value in user_options.items(): if key in func_params and value is not None: # Check if the parameter was provided via command line @@ -809,8 +809,6 @@ def get_shared_config(): >>> shared_config = get_shared_config() >>> ctx.default_map = build_default_map_for_group(run_cloudos_cli, shared_config) """ - from rich.console import Console - config_manager = ConfigurationProfile() profile_to_use = config_manager.determine_default_profile() diff --git a/cloudos_cli/datasets/cli.py b/cloudos_cli/datasets/cli.py index 8d365911..10c44a77 100644 --- a/cloudos_cli/datasets/cli.py +++ b/cloudos_cli/datasets/cli.py @@ -166,8 +166,6 @@ def list_files(ctx, # Output handling if output_format == 'csv': - import csv - csv_filename = f'{output_basename}.csv' if details: diff --git a/cloudos_cli/interactive_session/__init__.py b/cloudos_cli/interactive_session/__init__.py new file mode 100644 index 00000000..1e1d8298 --- /dev/null +++ b/cloudos_cli/interactive_session/__init__.py @@ -0,0 +1 @@ +"""CloudOS interactive session module.""" diff --git a/cloudos_cli/interactive_session/cli.py b/cloudos_cli/interactive_session/cli.py new file mode 100644 index 00000000..0956cc33 --- /dev/null +++ b/cloudos_cli/interactive_session/cli.py @@ -0,0 +1,625 @@ +"""CLI commands for CloudOS interactive session management.""" + +import rich_click as click +import json +from cloudos_cli.clos import Cloudos +from cloudos_cli.datasets import Datasets +from cloudos_cli.utils.errors import BadRequestException +from cloudos_cli.utils.resources import ssl_selector +from cloudos_cli.interactive_session.interactive_session import ( + create_interactive_session_list_table, + process_interactive_session_list, + save_interactive_session_list_to_csv, + parse_shutdown_duration, + parse_data_file, + parse_link_path, + build_session_payload, + format_session_creation_table, + resolve_data_file_id, + validate_instance_type +) +from cloudos_cli.configure.configure import with_profile_config, CLOUDOS_URL +from cloudos_cli.utils.cli_helpers import pass_debug_to_subcommands +from cloudos_cli.utils.requests import retry_requests_get + + +# Create the interactive_session group +@click.group(cls=pass_debug_to_subcommands()) +def interactive_session(): + """CloudOS interactive session functionality: list and manage interactive sessions.""" + print(interactive_session.__doc__ + '\n') + + +@interactive_session.command('list') +@click.option('-k', + '--apikey', + help='Your CloudOS API key', + required=True) +@click.option('-c', + '--cloudos-url', + help=(f'The CloudOS url you are trying to access to. Default={CLOUDOS_URL}.'), + default=CLOUDOS_URL, + required=True) +@click.option('--workspace-id', + help='The specific CloudOS workspace id.', + required=True) +@click.option('--filter-status', + multiple=True, + type=click.Choice(['setup', 'initialising', 'running', 'scheduled', 'stopped'], case_sensitive=False), + help='Filter sessions by status. Can be specified multiple times to filter by multiple statuses.') +@click.option('--limit', + type=int, + default=10, + help='Number of results per page. Default=10, max=100.') +@click.option('--page', + type=int, + default=1, + help='Page number to retrieve. Default=1.') +@click.option('--filter-only-mine', + is_flag=True, + help='Show only the current user\'s sessions.') +@click.option('--archived', + is_flag=True, + help='When this flag is used, only archived sessions list is collected.') +@click.option('--output-format', + help='Output format for session list.', + type=click.Choice(['stdout', 'csv', 'json'], case_sensitive=False), + default='stdout') +@click.option('--output-basename', + help=('Output file base name to save sessions list. ' + + 'Default=interactive_sessions_list'), + default='interactive_sessions_list', + required=False) +@click.option('--table-columns', + help=('Comma-separated list of columns to display in the table. Only applicable when --output-format=stdout. ' + + 'Available columns: backend, cost, cost_limit, created_at, id, instance, name, owner, project, resources, runtime, saved_at, spot, status, time_left, type, version. ' + + 'Default: responsive (auto-selects columns based on terminal width)'), + default=None) +@click.option('--all-fields', + help=('Whether to collect all available fields from sessions or ' + + 'just the preconfigured selected fields. Only applicable ' + + 'when --output-format=csv.'), + is_flag=True) +@click.option('--verbose', + help='Whether to print information messages or not.', + is_flag=True) +@click.option('--disable-ssl-verification', + help=('Disable SSL certificate verification. Please, remember that this option is ' + + 'not generally recommended for security reasons.'), + is_flag=True) +@click.option('--ssl-cert', + help='Path to your SSL certificate file.') +@click.option('--profile', help='Profile to use from the config file', default=None) +@click.pass_context +@with_profile_config(required_params=['apikey', 'workspace_id']) +def list_sessions(ctx, + apikey, + cloudos_url, + workspace_id, + filter_status, + limit, + page, + filter_only_mine, + archived, + output_format, + output_basename, + table_columns, + all_fields, + verbose, + disable_ssl_verification, + ssl_cert, + profile): + """List interactive sessions for a CloudOS team.""" + # apikey, cloudos_url, and team_id are now automatically resolved by the decorator + + verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) + + # Validate limit parameter + if not isinstance(limit, int) or limit < 1: + raise ValueError('Please use a positive integer (>= 1) for the --limit parameter') + + if limit > 100: + click.secho('Error: Limit cannot exceed 100. Please use --limit with a value <= 100', fg='red', err=True) + raise SystemExit(1) + + # Validate page parameter + if not isinstance(page, int) or page < 1: + raise ValueError('Please use a positive integer (>= 1) for the --page parameter') + + # Validate table columns if specified + valid_columns = {'id', 'name', 'status', 'type', 'instance', 'cost', 'owner', 'project', + 'created_at', 'runtime', 'saved_at', 'resources', 'backend', 'version', + 'spot', 'cost_limit', 'time_left'} + selected_columns = table_columns + + if selected_columns: + # Parse columns (split by comma and strip whitespace) + col_list = [col.strip() for col in selected_columns.split(',')] + invalid_cols = [col for col in col_list if col not in valid_columns] + + if invalid_cols: + click.secho(f'Error: Invalid column(s): {", ".join(invalid_cols)}', fg='red', err=True) + click.secho(f'Valid columns: {", ".join(sorted(valid_columns))}', fg='yellow', err=True) + click.secho(f'\nTip: Use --help without other options to see command help', fg='cyan', err=True) + raise SystemExit(1) + + # Prepare output file if needed + if output_format != 'stdout': + outfile = output_basename + '.' + output_format + + if verbose: + print('Executing list...') + print('\t...Preparing objects') + + cl = Cloudos(cloudos_url, apikey, None) + + if verbose: + print('\tThe following Cloudos object was created:') + print('\t' + str(cl) + '\n') + print('\tSearching for interactive sessions in the following workspace: ' + f'{workspace_id}') + + try: + # Call the API method to get interactive sessions + result = cl.get_interactive_session_list( + workspace_id, + page=page, + limit=limit, + status=list(filter_status) if filter_status else None, + owner_only=filter_only_mine, + include_archived=archived, + verify=verify_ssl + ) + + sessions = result.get('sessions', []) + pagination_metadata = result.get('pagination_metadata', None) + + # Define callback function for fetching additional pages + def fetch_page(page_num): + """Fetch a specific page of interactive sessions.""" + return cl.get_interactive_session_list( + workspace_id, + page=page_num, + limit=limit, + status=list(filter_status) if filter_status else None, + owner_only=filter_only_mine, + include_archived=archived, + verify=verify_ssl + ) + + # Handle empty results + if len(sessions) == 0: + if filter_status: + # Show helpful message when filtering returns no results + status_flow = 'scheduled → initialising → setup → running → stopped' + click.secho(f'No interactive sessions found in the requested status.', fg='yellow', err=True) + click.secho(f'Session status flow: {status_flow}', fg='cyan', err=True) + elif output_format == 'stdout': + create_interactive_session_list_table([], pagination_metadata, selected_columns, page_size=limit, fetch_page_callback=fetch_page) + else: + print('A total of 0 interactive sessions collected.') + + # Display results based on output format + elif output_format == 'stdout': + create_interactive_session_list_table(sessions, pagination_metadata, selected_columns, page_size=limit, fetch_page_callback=fetch_page) + + elif output_format == 'csv': + sessions_df = process_interactive_session_list(sessions, all_fields) + save_interactive_session_list_to_csv(sessions_df, outfile, count=len(sessions)) + + elif output_format == 'json': + with open(outfile, 'w') as o: + o.write(json.dumps(sessions, indent=2)) + print(f'\tInteractive session list collected with a total of {len(sessions)} sessions on this page.') + print(f'\tInteractive session list saved to {outfile}') + + else: + raise ValueError('Unrecognised output format. Please use one of [stdout|csv|json]') + + except BadRequestException as e: + error_str = str(e) + # Check if the error is related to authentication + if '401' in error_str or 'Unauthorized' in error_str: + click.secho(f'Error: Failed to retrieve interactive sessions. Please check your credentials (API key and CloudOS URL).', fg='red', err=True) + raise SystemExit(1) + # Check if the error is related to status filtering + elif filter_status and ('400' in error_str or 'Invalid' in error_str): + status_flow = 'scheduled → initialising → setup → running → stopped' + click.secho(f'No interactive sessions found in the requested status.', fg='yellow', err=True) + click.secho(f'Session status flow: {status_flow}', fg='cyan', err=True) + raise SystemExit(1) + else: + click.secho(f'Error: Failed to retrieve interactive sessions: {e}', fg='red', err=True) + raise SystemExit(1) + except Exception as e: + error_str = str(e) + # Check for DNS/connection errors + if 'Failed to resolve' in error_str or 'Name or service not known' in error_str or 'nodename nor servname provided' in error_str: + click.secho(f'Error: Unable to connect to CloudOS URL. Please verify the CloudOS URL is correct.', fg='red', err=True) + # Check for 401 Unauthorized + elif '401' in error_str or 'Unauthorized' in error_str: + click.secho(f'Error: Failed to retrieve interactive sessions. Please check your credentials (API key and CloudOS URL).', fg='red', err=True) + else: + click.secho(f'Error: {str(e)}', fg='red', err=True) + raise SystemExit(1) + + +@interactive_session.command('create') +@click.option('-k', + '--apikey', + help='Your CloudOS API key', + required=False) +@click.option('-c', + '--cloudos-url', + help=(f'The CloudOS url you are trying to access to. Default={CLOUDOS_URL}.'), + default=CLOUDOS_URL, + required=False) +@click.option('--workspace-id', + help='The specific CloudOS workspace id.', + required=False) +@click.option('--project-name', + help='The project name. Will be resolved to project ID automatically.', + required=True) +@click.option('--name', + help='Name for the interactive session (1-100 characters).', + required=True) +@click.option('--session-type', + type=click.Choice(['jupyter', 'vscode', 'spark', 'rstudio'], case_sensitive=False), + help='Type of interactive session.', + required=True) +@click.option('--instance', + help='Instance type (e.g., c5.xlarge for AWS, Standard_F1s for Azure). Default depends on execution platform.', + default=None) +@click.option('--storage', + type=int, + help='Storage in GB (100-5000). Default=500.', + default=500) +@click.option('--spot', + is_flag=True, + help='Use spot instances.') +@click.option('--shared', + is_flag=True, + help='Make session shared (accessible to workspace).') +@click.option('--cost-limit', + type=float, + help='Cost limit in USD. Default=-1 (unlimited).', + default=-1) +@click.option('--shutdown-in', + help='Auto-shutdown duration (e.g., 8h, 2d).') +@click.option('--mount', + multiple=True, + help='Mount a data file into the session. Supports both CloudOS datasets and S3 files. Format: project_name/dataset_path (e.g., leila-test/Data/file.csv) or s3://bucket/path/to/file (e.g., s3://my-bucket/data/file.csv). Can be used multiple times.') +@click.option('--link', + multiple=True, + help='Link a folder into the session for read/write access. Supports S3 folders and CloudOS folders. Format: s3://bucket/prefix (e.g., s3://my-bucket/data/) or project_name/folder_path (e.g., leila-test/Data). Legacy format: mountName:bucketName:s3Prefix. Can be used multiple times.') +@click.option('--r-version', + type=click.Choice(['4.5.2', '4.4.2'], case_sensitive=False), + help='R version for RStudio. Options: 4.5.2 (default), 4.4.2.', + default='4.5.2') +@click.option('--spark-master', + help='Master instance type for Spark. Default=c5.2xlarge.', + default='c5.2xlarge') +@click.option('--spark-core', + help='Core instance type for Spark. Default=c5.xlarge.', + default='c5.xlarge') +@click.option('--spark-workers', + type=int, + help='Initial worker count for Spark. Default=1.', + default=1) +@click.option('--execution-platform', + type=click.Choice(['aws', 'azure'], case_sensitive=False), + help='Cloud execution platform (aws or azure). Default is obtained from profile.', + default=None) +@click.option('--disable-ssl-verification', + help=('Disable SSL certificate verification. Please, remember that this option is ' + + 'not generally recommended for security reasons.'), + is_flag=True) +@click.option('--ssl-cert', + help='Path to your SSL certificate file.') +@click.option('--profile', help='Profile to use from the config file', default=None) +@click.option('--verbose', + help='Whether to print information messages or not.', + is_flag=True) +@click.pass_context +@with_profile_config(required_params=['apikey', 'workspace_id', 'project_name']) +def create_session(ctx, + apikey, + cloudos_url, + workspace_id, + project_name, + name, + session_type, + instance, + storage, + spot, + shared, + cost_limit, + shutdown_in, + mount, + link, + r_version, + spark_master, + spark_core, + spark_workers, + execution_platform, + disable_ssl_verification, + ssl_cert, + profile, + verbose): + """Create a new interactive session.""" + + verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert) + + # Default execution_platform to 'aws' if not specified by user or profile + if execution_platform is None: + execution_platform = 'aws' + else: + # Normalize to lowercase + execution_platform = execution_platform.lower() + + # Set instance default based on execution_platform if not specified + if instance is None: + instance = 'c5.xlarge' if execution_platform == 'aws' else 'Standard_F1s' + + # Validate instance type format + is_valid, error_msg = validate_instance_type(instance, execution_platform) + if not is_valid: + click.secho(f'Error: {error_msg}', fg='red', err=True) + click.secho(f'Hint: Check your instance type spelling and format for {execution_platform.upper()}.', fg='yellow', err=True) + raise SystemExit(1) + + # Validate Spark instance types if session type is spark + if session_type.lower() == 'spark': + # Spark is AWS only, so use 'aws' for validation + is_valid_master, error_msg_master = validate_instance_type(spark_master, 'aws') + if not is_valid_master: + click.secho(f'Error: Invalid Spark master instance type: {error_msg_master}', fg='red', err=True) + raise SystemExit(1) + + is_valid_core, error_msg_core = validate_instance_type(spark_core, 'aws') + if not is_valid_core: + click.secho(f'Error: Invalid Spark core instance type: {error_msg_core}', fg='red', err=True) + raise SystemExit(1) + + if verbose: + print('Executing create interactive session...') + print('\t...Preparing objects') + + cl = Cloudos(cloudos_url, apikey, None) + + if verbose: + print('\tThe following Cloudos object was created:') + print('\t' + str(cl) + '\n') + print(f'\tCreating interactive session in workspace: {workspace_id}') + + try: + # Resolve project name to project ID + project_id = cl.get_project_id_from_name(workspace_id, project_name, verify=verify_ssl) + if verbose: + print(f'\tResolved project name "{project_name}" to ID: {project_id}') + + # Parse session type to lowercase + session_type_lower = session_type.lower() + + # Map session type to backend name + backend_type_mapping = { + 'jupyter': 'regular', + 'vscode': 'vscode', + 'spark': 'spark', + 'rstudio': 'rstudio' + } + backend_type = backend_type_mapping.get(session_type_lower) + + if not backend_type: + click.secho(f'Error: Invalid session type: {session_type}', fg='red', err=True) + raise SystemExit(1) + + # Parse shutdown duration + shutdown_at_parsed = None + if shutdown_in: + try: + shutdown_at_parsed = parse_shutdown_duration(shutdown_in) + except ValueError as e: + click.secho(f'Error: Invalid shutdown duration: {str(e)}', fg='red', err=True) + raise SystemExit(1) + + # Parse and resolve mounted data files (both CloudOS and S3) + parsed_data_files = [] + parsed_s3_mounts = [] # S3 folders go into FUSE mounts + if mount: + try: + for df in mount: + parsed = parse_data_file(df) + + if parsed['type'] == 's3': + # S3 files are only supported on AWS + if execution_platform != 'aws': + click.secho(f'Error: S3 mounts are only supported on AWS. Use CloudOS file explorer paths for Azure.', fg='red', err=True) + raise SystemExit(1) + + # S3 file: add to dataItems as S3File type + if verbose: + print(f'\tMounting S3 file: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') + + # Use the full path as the name + s3_file_item = { + "type": "S3File", + "data": { + "name": parsed["s3_prefix"], + "s3BucketName": parsed["s3_bucket"], + "s3ObjectKey": parsed["s3_prefix"] + } + } + parsed_data_files.append(s3_file_item) + + if verbose: + print(f'\t ✓ Added S3 file to mount') + + else: # type == 'cloudos' + # CloudOS dataset file: resolve via Datasets API + data_project = parsed['project_name'] + dataset_path = parsed['dataset_path'] + + if verbose: + print(f'\tResolving dataset: {data_project}/{dataset_path}') + + # Create a Datasets API instance for this specific project + datasets_api = Datasets( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=data_project, + verify=verify_ssl, + cromwell_token=None + ) + + resolved = resolve_data_file_id(datasets_api, dataset_path) + parsed_data_files.append(resolved) + + if verbose: + print(f'\t ✓ Resolved to file ID: {resolved["item"]}') + except Exception as e: + click.secho(f'Error: Failed to resolve dataset files: {str(e)}', fg='red', err=True) + raise SystemExit(1) + + # Parse and add linked folders from --link (S3 or CloudOS) + for link_path in link: + try: + # Block all linking on Azure platforms + if execution_platform == 'azure': + click.secho(f'Error: Linking folders is not supported on Azure. Please use `cloudos interactive-session create --mount` to load your data in the session.', fg='red', err=True) + raise SystemExit(1) + + parsed = parse_link_path(link_path) + + if parsed['type'] == 's3': + # S3 folders are only supported on AWS (additional safeguard) + if execution_platform != 'aws': + click.secho(f'Error: S3 links are only supported on AWS execution platform.', fg='red', err=True) + raise SystemExit(1) + + # S3 folder: create S3Folder FUSE mount + if verbose: + print(f'\tLinking S3: s3://{parsed["s3_bucket"]}/{parsed["s3_prefix"]}') + + # Use bucket name or mount_name if provided (legacy format) + mount_name = parsed.get('mount_name', f"{parsed['s3_bucket']}-mount") + s3_mount_item = { + "type": "S3Folder", + "data": { + "name": mount_name, + "s3BucketName": parsed["s3_bucket"], + "s3Prefix": parsed["s3_prefix"] + } + } + parsed_s3_mounts.append(s3_mount_item) + + if verbose: + print(f'\t ✓ Linked S3: {mount_name}') + + else: # type == 'cloudos' + # CloudOS folder: resolve via Datasets API + folder_project = parsed['project_name'] + folder_path = parsed['folder_path'] + + if verbose: + print(f'\tLinking CloudOS folder: {folder_project}/{folder_path}') + + # Create Datasets API instance for this project + datasets_api = Datasets( + cloudos_url=cloudos_url, + apikey=apikey, + workspace_id=workspace_id, + project_name=folder_project, + verify=verify_ssl, + cromwell_token=None + ) + + # Get folder contents to verify it exists + folder_content = datasets_api.list_folder_content(folder_path) + + # For CloudOS folders, we create a mount item + mount_name = folder_path.split('/')[-1] if folder_path else folder_project + cloudos_mount_item = { + "type": "S3Folder", + "data": { + "name": mount_name, + "s3BucketName": folder_project, + "s3Prefix": folder_path + ("/" if folder_path and not folder_path.endswith('/') else "") + } + } + parsed_s3_mounts.append(cloudos_mount_item) + + if verbose: + print(f'\t ✓ Linked CloudOS folder: {mount_name}') + + except Exception as e: + click.secho(f'Error: Failed to link folder: {str(e)}', fg='red', err=True) + raise SystemExit(1) + + # Build the session payload + payload = build_session_payload( + name=name, + backend=backend_type, + execution_platform=execution_platform, + instance_type=instance, + storage_size=storage, + is_spot=spot, + is_shared=shared, + cost_limit=cost_limit, + shutdown_at=shutdown_at_parsed, + project_id=project_id, + data_files=parsed_data_files, + s3_mounts=parsed_s3_mounts if execution_platform == 'aws' else [], + r_version=r_version, + spark_master_type=spark_master, + spark_core_type=spark_core, + spark_workers=spark_workers + ) + + if verbose: + print('\tPayload constructed:') + print(json.dumps(payload, indent=2)) + + # Create the session via API + response = cl.create_interactive_session(workspace_id, payload, verify=verify_ssl) + + session_id = response.get('_id') + + if verbose: + print(f'\tSession created with ID: {session_id}') + + # Display session creation details in table format + format_session_creation_table( + response, + instance_type=instance, + storage_size=storage, + backend_type=backend_type, + r_version=r_version, + spark_master=spark_master, + spark_core=spark_core, + spark_workers=spark_workers, + data_files=parsed_data_files, + s3_mounts=parsed_s3_mounts + ) + + if verbose: + print('\tSession creation completed successfully!') + + except BadRequestException as e: + error_str = str(e) + if '401' in error_str or 'Unauthorized' in error_str: + click.secho(f'Error: Failed to create interactive session. Please check your credentials (API key and CloudOS URL).', fg='red', err=True) + else: + click.secho(f'Error: Failed to create interactive session: {e}', fg='red', err=True) + raise SystemExit(1) + except Exception as e: + error_str = str(e) + # Check for DNS/connection errors + if 'Failed to resolve' in error_str or 'Name or service not known' in error_str or 'nodename nor servname provided' in error_str: + click.secho(f'Error: Unable to connect to CloudOS URL. Please verify the CloudOS URL is correct.', fg='red', err=True) + # Check for 401 Unauthorized + elif '401' in error_str or 'Unauthorized' in error_str: + click.secho(f'Error: Failed to create interactive session. Please check your credentials (API key and CloudOS URL).', fg='red', err=True) + else: + click.secho(f'Error: {str(e)}', fg='red', err=True) + raise SystemExit(1) diff --git a/cloudos_cli/interactive_session/interactive_session.py b/cloudos_cli/interactive_session/interactive_session.py new file mode 100644 index 00000000..2996a1ae --- /dev/null +++ b/cloudos_cli/interactive_session/interactive_session.py @@ -0,0 +1,1263 @@ +"""Interactive session helper functions for CloudOS.""" + +import pandas as pd +import sys +import re +from datetime import datetime, timedelta, timezone +from rich.table import Table +from rich.console import Console + + +def validate_instance_type(instance_type, execution_platform='aws'): + """Validate instance type format for the given execution platform. + + Parameters + ---------- + instance_type : str + Instance type to validate + execution_platform : str + 'aws' or 'azure' + + Returns + ------- + tuple + (is_valid: bool, error_message: str or None) + """ + if not instance_type or not isinstance(instance_type, str): + return False, "Instance type must be a non-empty string" + + if execution_platform == 'aws': + # AWS EC2 instance format: . + # Examples: c5.xlarge, m5.2xlarge, r6i.large, t3.medium, g4dn.xlarge + # Family: c, m, r, t, g, p, i, d, x, z, h, etc. (1-4 chars) + # Generation: digit(s) optionally followed by letter(s) for variants + # Size: nano, micro, small, medium, large, xlarge, 2xlarge, 4xlarge, etc. + aws_pattern = r'^[a-z]{1,4}\d+[a-z]*\.(\d+)?(nano|micro|small|medium|large|xlarge|metal)$' + + if not re.match(aws_pattern, instance_type, re.IGNORECASE): + return False, (f"Invalid AWS instance type format: '{instance_type}'. " + f"Expected format: . (e.g., c5.xlarge, m5.2xlarge)") + + elif execution_platform == 'azure': + # Azure VM format: Standard__ or Basic_ + # Examples: Standard_F1s, Standard_D4as_v4, Standard_B2ms, Basic_A1 + azure_pattern = r'^(Standard|Basic)_[A-Z]\d+[a-z]*(_v\d+)?$' + + if not re.match(azure_pattern, instance_type): + return False, (f"Invalid Azure instance type format: '{instance_type}'. " + f"Expected format: Standard_ (e.g., Standard_F1s, Standard_D4as_v4)") + + else: + # Unknown platform - skip validation + return True, None + + return True, None + + +def _map_session_type_to_friendly_name(session_type): + """Map internal session type names to user-friendly display names. + + Parameters + ---------- + session_type : str + Internal session type (e.g., 'awsJupyterNotebook') + + Returns + ------- + str + User-friendly type name (e.g., 'Jupyter') + """ + type_mapping = { + 'awsJupyterNotebook': 'Jupyter', + 'azureJupyterNotebook': 'Jupyter', + 'awsVSCode': 'VS Code', + 'azureVSCode': 'VS Code', + 'awsRstudio': 'RStudio', + 'azureRstudio': 'RStudio', + 'awsSpark': 'Spark', + 'awsJupyterSparkNotebook': 'Spark', + 'azureJupyterSparkNotebook': 'Spark', + 'azureSpark': 'Spark', + 'awsRStudio': 'RStudio', # Handle both capitalizations + 'azureRStudio': 'RStudio' + } + return type_mapping.get(session_type, session_type) + + +def create_interactive_session_list_table(sessions, pagination_metadata=None, selected_columns=None, page_size=10, fetch_page_callback=None): + """Create a rich table displaying interactive sessions with interactive pagination. + + Parameters + ---------- + sessions : list + List of session objects from the API + pagination_metadata : dict, optional + Pagination information from the API response + selected_columns : str or list, optional + Comma-separated string or list of column names to display. + If None, uses responsive column selection based on terminal width. + Available columns: id, name, status, type, instance, cost, owner + page_size : int, optional + Number of sessions per page for interactive pagination. Default=10. + fetch_page_callback : callable, optional + Callback function to fetch a specific page of results. + Should accept page number (1-indexed) and return dict with 'sessions' and 'pagination_metadata' keys. + """ + console = Console() + + # Define all available columns with their configuration + all_columns = { + 'id': { + 'header': 'ID', + 'style': 'cyan', + 'no_wrap': True, + 'max_width': 24, + 'accessor': '_id' + }, + 'name': { + 'header': 'Name', + 'style': 'green', + 'overflow': 'ellipsis', + 'max_width': 25, + 'accessor': 'name' + }, + 'status': { + 'header': 'Status', + 'style': 'yellow', + 'no_wrap': True, + 'max_width': 12, + 'accessor': 'status' + }, + 'type': { + 'header': 'Type', + 'style': 'magenta', + 'overflow': 'fold', + 'max_width': 20, + 'accessor': 'interactiveSessionType' + }, + 'instance': { + 'header': 'Instance', + 'style': 'cyan', + 'overflow': 'ellipsis', + 'max_width': 15, + 'accessor': 'resources.instanceType' + }, + 'cost': { + 'header': 'Cost', + 'style': 'green', + 'no_wrap': True, + 'max_width': 12, + 'accessor': 'totalCostInUsd' + }, + 'owner': { + 'header': 'Owner', + 'style': 'white', + 'overflow': 'ellipsis', + 'max_width': 20, + 'accessor': 'user.name' + }, + 'project': { + 'header': 'Project', + 'style': 'cyan', + 'overflow': 'ellipsis', + 'max_width': 20, + 'accessor': 'project.name' + }, + 'created_at': { + 'header': 'Created At', + 'style': 'white', + 'overflow': 'ellipsis', + 'max_width': 20, + 'accessor': 'createdAt' + }, + 'runtime': { + 'header': 'Total Running Time', + 'style': 'white', + 'no_wrap': True, + 'max_width': 18, + 'accessor': 'totalRunningTimeInSeconds' + }, + 'saved_at': { + 'header': 'Last Time Saved', + 'style': 'white', + 'overflow': 'ellipsis', + 'max_width': 20, + 'accessor': 'lastSavedAt' + }, + 'resources': { + 'header': 'Resources', + 'style': 'cyan', + 'overflow': 'ellipsis', + 'max_width': 30, + 'accessor': 'resources.instanceType' + }, + 'backend': { + 'header': 'Backend', + 'style': 'magenta', + 'overflow': 'fold', + 'max_width': 15, + 'accessor': 'interactiveSessionType' + }, + 'version': { + 'header': 'Version', + 'style': 'white', + 'no_wrap': True, + 'max_width': 15, + 'accessor': 'rVersion' + }, + 'spot': { + 'header': 'Spot', + 'style': 'cyan', + 'no_wrap': True, + 'max_width': 6, + 'accessor': 'resources.isCostSaving' + }, + 'cost_limit': { + 'header': 'Cost Limit Left', + 'style': 'yellow', + 'no_wrap': True, + 'max_width': 15, + 'accessor': 'execution' + }, + 'time_left': { + 'header': 'Time Until Shutdown', + 'style': 'magenta', + 'no_wrap': True, + 'max_width': 20, + 'accessor': 'execution.autoShutdownAtDate' + } + } + + # Determine columns to display + if selected_columns: + if isinstance(selected_columns, str): + selected_columns = [col.strip() for col in selected_columns.split(',')] + columns_to_show = selected_columns + else: + # Responsive column selection based on terminal width + terminal_width = console.width + if terminal_width < 60: + columns_to_show = ['status', 'name', 'id'] + elif terminal_width < 90: + columns_to_show = ['status', 'name', 'type', 'id', 'owner'] + elif terminal_width < 130: + columns_to_show = ['status', 'name', 'type', 'instance', 'cost', 'id', 'owner'] + else: + columns_to_show = ['id', 'name', 'status', 'type', 'instance', 'cost', 'owner'] + + # Handle empty results + if len(sessions) == 0: + console.print('[yellow]No interactive sessions found.[/yellow]') + return + + # Prepare rows data + rows = [] + for session in sessions: + row_data = [] + for col_name in columns_to_show: + if col_name not in all_columns: + continue + col_config = all_columns[col_name] + accessor = col_config['accessor'] + + # Extract value from session object + value = _get_nested_value(session, accessor) + + # Format the value + formatted_value = _format_session_field(col_name, value) + row_data.append(formatted_value) + + rows.append(row_data) + + # Interactive pagination - use API pagination metadata if available + if pagination_metadata: + # Server-side pagination + current_api_page = pagination_metadata.get('page', 1) + total_sessions = pagination_metadata.get('count', len(sessions)) + total_pages = pagination_metadata.get('totalPages', 1) + else: + # Client-side pagination (fallback) + current_api_page = 0 + total_sessions = len(sessions) + total_pages = (len(sessions) + page_size - 1) // page_size if len(sessions) > 0 else 1 + + show_error = None # Track error messages to display + + while True: + # For client-side pagination, start/end are indices into the local rows array + # For server-side pagination, we use the API page directly + if fetch_page_callback and pagination_metadata: + # Server-side pagination - sessions list contains current page data + page_rows = rows[:] # All rows are from current page + else: + # Client-side pagination + start = current_api_page * page_size + end = start + page_size + page_rows = rows[start:end] + + # Clear console first + console.clear() + + # Create table + table = Table(title='Interactive Sessions') + + # Add columns to table + for col_name in columns_to_show: + if col_name not in all_columns: + continue + col_config = all_columns[col_name] + table.add_column( + col_config['header'], + style=col_config.get('style', 'white'), + no_wrap=col_config.get('no_wrap', False) + ) + + # Add rows to table + for row in page_rows: + table.add_row(*row) + + # Print table + console.print(table) + + # Display pagination info + console.print(f"\n[cyan]Total sessions:[/cyan] {total_sessions}") + if total_pages > 1: + console.print(f"[cyan]Page:[/cyan] {current_api_page} of {total_pages}") + console.print(f"[cyan]Sessions on this page:[/cyan] {len(page_rows)}") + + # Show error message if any + if show_error: + console.print(show_error) + show_error = None # Reset error after displaying + + # Show pagination controls + if total_pages > 1: + # Check if we're in an interactive environment + if not sys.stdin.isatty(): + console.print("\n[yellow]Note: Pagination not available in non-interactive mode. Showing page 1 of {0}.[/yellow]".format(total_pages)) + console.print("[yellow]Run in an interactive terminal to navigate through all pages.[/yellow]") + break + + console.print(f"\n[bold cyan]n[/] = next, [bold cyan]p[/] = prev, [bold cyan]q[/] = quit") + + # Get user input for navigation + try: + choice = input(">>> ").strip().lower() + except (EOFError, KeyboardInterrupt): + # Handle non-interactive environments or user interrupt + console.print("\n[yellow]Pagination interrupted.[/yellow]") + break + + if choice in ("q", "quit"): + break + elif choice in ("n", "next"): + if current_api_page < total_pages: + # Try to fetch the next page + if fetch_page_callback: + try: + next_page_data = fetch_page_callback(current_api_page + 1) + sessions = next_page_data.get('sessions', []) + pagination_metadata = next_page_data.get('pagination_metadata', {}) + current_api_page = pagination_metadata.get('page', current_api_page + 1) + total_pages = pagination_metadata.get('totalPages', total_pages) + + # Rebuild rows for the new page + rows = [] + for session in sessions: + row_data = [] + for col_name in columns_to_show: + if col_name not in all_columns: + continue + col_config = all_columns[col_name] + accessor = col_config['accessor'] + value = _get_nested_value(session, accessor) + formatted_value = _format_session_field(col_name, value) + row_data.append(formatted_value) + rows.append(row_data) + except Exception as e: + show_error = f"[red]Error fetching next page: {str(e)}[/red]" + else: + current_api_page += 1 + else: + show_error = "[red]Invalid choice. Already on the last page.[/red]" + elif choice in ("p", "prev"): + if current_api_page > 1: + # Try to fetch the previous page + if fetch_page_callback: + try: + prev_page_data = fetch_page_callback(current_api_page - 1) + sessions = prev_page_data.get('sessions', []) + pagination_metadata = prev_page_data.get('pagination_metadata', {}) + current_api_page = pagination_metadata.get('page', current_api_page - 1) + total_pages = pagination_metadata.get('totalPages', total_pages) + + # Rebuild rows for the new page + rows = [] + for session in sessions: + row_data = [] + for col_name in columns_to_show: + if col_name not in all_columns: + continue + col_config = all_columns[col_name] + accessor = col_config['accessor'] + value = _get_nested_value(session, accessor) + formatted_value = _format_session_field(col_name, value) + row_data.append(formatted_value) + rows.append(row_data) + except Exception as e: + show_error = f"[red]Error fetching previous page: {str(e)}[/red]" + else: + current_api_page -= 1 + else: + show_error = "[red]Invalid choice. Already on the first page.[/red]" + else: + show_error = "[red]Invalid choice. Please enter 'n' (next), 'p' (prev), or 'q' (quit).[/red]" + else: + # Only one page, no need for input, just exit + break + + + +def process_interactive_session_list(sessions, all_fields=False): + """Process interactive sessions data into a pandas DataFrame. + + Parameters + ---------- + sessions : list + List of session objects from the API + all_fields : bool, default=False + If True, include all fields from the API response. + If False, include only the most relevant fields. + + Returns + ------- + df : pandas.DataFrame + DataFrame with session data + """ + if all_fields: + # Return all fields from the API response + df = pd.json_normalize(sessions) + else: + # Return only selected fields + rows = [] + for session in sessions: + # Get user info (API uses 'name' and 'surname', not 'firstName' and 'lastName') + user_obj = session.get('user', {}) + user_name = '' + if user_obj: + first_name = user_obj.get('name', '') + last_name = user_obj.get('surname', '') + user_name = f'{first_name} {last_name}'.strip() + + row = { + '_id': session.get('_id', ''), + 'name': session.get('name', ''), + 'status': session.get('status', ''), + 'interactiveSessionType': _map_session_type_to_friendly_name(session.get('interactiveSessionType', '')), + 'user': user_name, + 'instanceType': session.get('resources', {}).get('instanceType', ''), + 'totalCostInUsd': session.get('totalCostInUsd', 0), + } + rows.append(row) + df = pd.DataFrame(rows) + + return df + + +def _get_nested_value(obj, path): + """Get a nested value from an object using dot notation. + + Parameters + ---------- + obj : dict + The object to extract from + path : str + Dot-separated path (e.g., 'user.firstName') + + Returns + ------- + value + The value at the path, or empty string if not found + """ + parts = path.split('.') + value = obj + for part in parts: + if isinstance(value, dict): + value = value.get(part) + else: + return '' + return value if value is not None else '' + + +def _format_session_field(field_name, value): + """Format a session field for display. + + Parameters + ---------- + field_name : str + The name of the field + value + The value to format + + Returns + ------- + str + The formatted value + """ + if value == '' or value is None: + return '-' + + if field_name == 'status': + # Color code status and map display values + status_lower = str(value).lower() + # Map API statuses to display values + # API 'ready' and 'aborted' are mapped to user-friendly names + display_status = 'running' if status_lower == 'ready' else ('stopped' if status_lower == 'aborted' else value) + + if status_lower in ['ready', 'running']: + return f'[bold green]{display_status}[/bold green]' + elif status_lower in ['stopped', 'aborted']: + return f'[bold red]{display_status}[/bold red]' + elif status_lower in ['setup', 'initialising', 'initializing', 'scheduled']: + return f'[bold yellow]{display_status}[/bold yellow]' + else: + return str(display_status) + + elif field_name == 'cost': + # Format cost with currency symbol + try: + cost = float(value) + return f'${cost:.2f}' + except (ValueError, TypeError): + return str(value) + + elif field_name == 'id': + # Return full ID without truncation (MongoDB ObjectIds are always 24 chars) + # Full ID is needed for status command and other operations + return str(value) + + elif field_name == 'name': + # Truncate long names + value_str = str(value) + if len(value_str) > 25: + return value_str[:22] + '…' + return value_str + + elif field_name == 'runtime': + # Convert seconds to human-readable format (e.g., "1h 52m 52s") + try: + total_seconds = int(float(value)) + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + seconds = total_seconds % 60 + if hours > 0: + return f'{hours}h {minutes}m {seconds}s' + elif minutes > 0: + return f'{minutes}m {seconds}s' + else: + return f'{seconds}s' + except (ValueError, TypeError): + return str(value) + + elif field_name == 'created_at' or field_name == 'saved_at': + # Format ISO8601 datetime to readable format + try: + dt = datetime.fromisoformat(str(value).replace('Z', '+00:00')) + return dt.strftime('%Y-%m-%d %H:%M') + except (ValueError, TypeError, ImportError): + return str(value)[:19] if value else '-' + + elif field_name == 'version': + # Version is only available for RStudio sessions + if value and str(value).lower() != 'none': + return f'R {value}' + return '-' + + elif field_name == 'type': + # Map internal type names to user-friendly names + return _map_session_type_to_friendly_name(str(value)) + + elif field_name == 'spot': + # Indicate if instance is cost-saving (spot) + if value is True: + return '[bold cyan]Yes[/bold cyan]' + elif value is False: + return 'No' + else: + return '-' + + elif field_name == 'cost_limit': + # Calculate remaining cost limit (execution object contains computeCostLimit and computeCostSpent) + if isinstance(value, dict): + cost_limit = value.get('computeCostLimit', -1) + cost_spent = value.get('computeCostSpent', 0) + + # -1 means unlimited + if cost_limit == -1: + return 'Unlimited' + + try: + remaining = float(cost_limit) - float(cost_spent) + if remaining < 0: + remaining = 0 + return f'${remaining:.2f}' + except (ValueError, TypeError): + return '-' + return '-' + + elif field_name == 'time_left': + # Calculate time until auto-shutdown + if value and value != 'null' and str(value).strip(): + try: + shutdown_time = datetime.fromisoformat(str(value).replace('Z', '+00:00')) + now = datetime.now(timezone.utc) + + if shutdown_time > now: + time_diff = shutdown_time - now + total_seconds = int(time_diff.total_seconds()) + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + + if hours > 24: + days = hours // 24 + remaining_hours = hours % 24 + return f'{days}d {remaining_hours}h' + elif hours > 0: + return f'{hours}h {minutes}m' + else: + return f'{minutes}m' + else: + return '[red]Expired[/red]' + except (ValueError, TypeError, ImportError): + return '-' + return '-' + + return str(value) + + +def save_interactive_session_list_to_csv(df, outfile, count=None): + """Save interactive session list to CSV file. + + Parameters + ---------- + df : pandas.DataFrame + The session data to save + outfile : str + Path to the output CSV file + count : int, optional + Total number of sessions on this page for display message + """ + df.to_csv(outfile, index=False) + if count is not None: + print(f'\tInteractive session list collected with a total of {count} sessions on this page.') + print(f'\tInteractive session list saved to {outfile}') + + +def parse_shutdown_duration(duration_str): + """Parse shutdown duration string to ISO8601 datetime string. + + Accepts formats: 30m, 2h, 8h, 1d, 2d + + Parameters + ---------- + duration_str : str + Duration string (e.g., "2h", "30m", "1d") + + Returns + ------- + str + ISO8601 formatted datetime string (future time) + """ + match = re.match(r'^(\d+)([mhd])$', duration_str.lower()) + if not match: + raise ValueError(f"Invalid duration format: {duration_str}. Use format like '2h', '30m', '1d'") + + value = int(match.group(1)) + unit = match.group(2) + + if unit == 'm': + delta = timedelta(minutes=value) + elif unit == 'h': + delta = timedelta(hours=value) + elif unit == 'd': + delta = timedelta(days=value) + + future_time = datetime.now(timezone.utc) + delta + return future_time.isoformat().replace('+00:00', 'Z') + + +def parse_data_file(data_file_str): + """Parse data file format: either S3 or CloudOS dataset path. + + Supports mounting both S3 files and CloudOS dataset files into the session. + + Parameters + ---------- + data_file_str : str + Format: + - S3 file: s3://bucket_name/path/to/file.txt + - CloudOS dataset: project_name/dataset_path or project_name > dataset_path + + Examples: + - s3://lifebit-featured-datasets/pipelines/phewas/data.csv + - leila-test/Data/3_vcf_list.txt + + Returns + ------- + dict + Parsed data item. For S3: + {"type": "s3", "s3_bucket": "...", "s3_prefix": "..."} + + For CloudOS dataset: + {"type": "cloudos", "project_name": "...", "dataset_path": "..."} + + Raises + ------ + ValueError + If format is invalid + """ + # Check if it's an S3 path + if data_file_str.startswith('s3://'): + # Parse S3 path: s3://bucket/prefix/file + s3_path = data_file_str[5:] # Remove 's3://' + parts = s3_path.split('/', 1) + + bucket = parts[0] + if not bucket: + raise ValueError(f"Invalid S3 path: {data_file_str}. Expected: s3://bucket_name/path/to/file") + + prefix = parts[1] if len(parts) > 1 else "/" + + return { + "type": "s3", + "s3_bucket": bucket, + "s3_prefix": prefix + } + + # Otherwise, parse as CloudOS dataset path + # Determine which separator to use: > takes precedence over / + separator = None + if '>' in data_file_str: + separator = '>' + elif '/' in data_file_str: + separator = '/' + else: + raise ValueError( + f"Invalid data file format: {data_file_str}. Expected one of:\n" + f" - S3 file: s3://bucket/path/file.txt\n" + f" - CloudOS dataset: project_name/dataset_path or project_name > dataset_path" + ) + + # Split only on the first separator to handle nested paths + parts = data_file_str.split(separator, 1) + if len(parts) != 2: + raise ValueError(f"Invalid data file format: {data_file_str}. Expected: project_name/dataset_path where dataset_path can be nested") + + project_name, dataset_path = parts + return { + "type": "cloudos", + "project_name": project_name.strip(), + "dataset_path": dataset_path.strip() + } + + +def resolve_data_file_id(datasets_api, dataset_path: str) -> dict: + """Resolve nested dataset path to actual file ID. + + Searches across all datasets in the project to find the target file. + This allows paths like 'Data/file.txt' to work even if 'Data' is a folder + within a dataset (not a dataset name itself). + + Parameters + ---------- + datasets_api : Datasets + Initialized Datasets API instance (with correct project_name) + dataset_path : str + Nested path to file within the project (e.g., 'Data/file.txt' or 'Folder/subfolder/file.txt') + Can start with a dataset name or a folder name within any dataset. + + Returns + ------- + dict + Data item object with resolved file ID: + {"kind": "File", "item": "", "name": ""} + + Raises + ------ + ValueError + If file not found in any dataset/folder + """ + try: + path_parts = dataset_path.strip('/').split('/') + file_name = path_parts[-1] + + # First, try the path as-is (assuming first part is a dataset name) + try: + result = datasets_api.list_folder_content(dataset_path) + + # Check if it's in the files list + for file_item in result.get('files', []): + if file_item.get('name') == file_name: + return { + "kind": "File", + "item": file_item.get('_id'), + "name": file_item.get('name') + } + # If we got here, quick path didn't work, continue to search + except (Exception): + # First path attempt failed, try searching across all datasets + pass + + # If the quick path didn't work, search across all datasets + # This handles the case where the first part is a folder, not a dataset name + project_content = datasets_api.list_project_content() + datasets = project_content.get('folders', []) + + if not datasets: + raise ValueError(f"No datasets found in project. Cannot locate path '{dataset_path}'") + + # Try to find the file in each dataset + found_files = [] + for dataset in datasets: + dataset_name = dataset.get('name') + try: + # Try with the dataset name prepended to the path + full_path = f"{dataset_name}/{dataset_path}" + result = datasets_api.list_folder_content(full_path) + + # Check files list + for file_item in result.get('files', []): + if file_item.get('name') == file_name: + found_files.append({ + "kind": "File", + "item": file_item.get('_id'), + "name": file_item.get('name') + }) + # Return first match (most direct path) + return found_files[0] + except Exception: + # This dataset doesn't contain the path, continue + continue + + # Also try searching without dataset prefix (path is from root of datasets) + for dataset in datasets: + try: + dataset_name = dataset.get('name') + # List what's in this dataset at the top level + dataset_content = datasets_api.list_datasets_content(dataset_name) + + # Check if the target file is directly in this dataset's files + for file_item in dataset_content.get('files', []): + if file_item.get('name') == file_name: + found_files.append({ + "kind": "File", + "item": file_item.get('_id'), + "name": file_item.get('name') + }) + + # Check folders and navigate if needed + for folder in dataset_content.get('folders', []): + if folder.get('name') == path_parts[0]: + # This dataset has the target folder + full_path = f"{dataset_name}/{dataset_path}" + try: + result = datasets_api.list_folder_content(full_path) + for file_item in result.get('files', []): + if file_item.get('name') == file_name: + return { + "kind": "File", + "item": file_item.get('_id'), + "name": file_item.get('name') + } + except Exception: + continue + except Exception: + continue + + # If we found files, return the first one + if found_files: + return found_files[0] + + # Nothing found - provide helpful error message + available_datasets = [d.get('name') for d in datasets] + raise ValueError( + f"File at path '{dataset_path}' not found in any dataset. " + f"Available datasets: {available_datasets}. " + f"Try using 'cloudos datasets ls' to explore your data structure." + ) + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Error resolving dataset file at path '{dataset_path}': {str(e)}") + + +def parse_link_path(link_path_str): + """Parse link path format: supports S3, CloudOS, or legacy colon format. + + Links an S3 folder or CloudOS folder to the session for read/write access. + + Parameters + ---------- + link_path_str : str + Format (one of): + - S3 path: s3://bucketName/s3Prefix (e.g., s3://my-bucket/data/) + - CloudOS folder: project/folder_path (e.g., leila-test/Data) + - Legacy format (deprecated): mountName:bucketName:s3Prefix + + Returns + ------- + dict + Tuple of (type, data) where type is 's3' or 'cloudos' and data contains: + For S3: {"s3_bucket": "...", "s3_prefix": "..."} + For CloudOS: {"project_name": "...", "folder_path": "..."} + """ + # Check for Azure blob storage paths and provide helpful error + if link_path_str.startswith('az://') or link_path_str.startswith('https://') and '.blob.core.windows.net' in link_path_str: + raise ValueError( + f"Azure blob storage paths are not supported for linking. " + f"Folder linking is not supported on Azure execution platforms. " + f"Please use CloudOS file explorer to access your data directly." + ) + + # Check for S3 path + if link_path_str.startswith('s3://'): + # Parse S3 path: s3://bucket/prefix + s3_path = link_path_str[5:] # Remove 's3://' + parts = s3_path.split('/', 1) + + if len(parts) < 1: + raise ValueError(f"Invalid S3 path: {link_path_str}. Expected: s3://bucket_name/prefix/") + + bucket = parts[0] + prefix = parts[1] if len(parts) > 1 else "" + + # Ensure prefix ends with / for S3 folders + if prefix and not prefix.endswith('/'): + prefix = prefix + '/' + + return { + "type": "s3", + "s3_bucket": bucket, + "s3_prefix": prefix + } + + # Check for legacy colon format + if ':' in link_path_str and '//' not in link_path_str: + # Legacy format: mountName:bucketName:s3Prefix + parts = link_path_str.split(':') + if len(parts) != 3: + raise ValueError(f"Invalid link format: {link_path_str}. Expected: mountName:bucketName:s3Prefix") + + mount_name, bucket, prefix = parts + + # Ensure prefix ends with / + if prefix and not prefix.endswith('/'): + prefix = prefix + '/' + + return { + "type": "s3", + "mount_name": mount_name, + "s3_bucket": bucket, + "s3_prefix": prefix + } + + # Otherwise, parse as CloudOS folder path + # Format: project_name/folder_path or project_name > folder_path + separator = None + if '>' in link_path_str: + separator = '>' + elif '/' in link_path_str: + separator = '/' + else: + raise ValueError( + f"Invalid link path format: {link_path_str}. Expected one of:\n" + f" - S3 path: s3://bucket/prefix/\n" + f" - CloudOS folder: project/folder/path\n" + f" - Legacy format (deprecated): mountName:bucketName:prefix" + ) + + parts = link_path_str.split(separator, 1) + if len(parts) != 2: + raise ValueError(f"Invalid link path: {link_path_str}") + + project_name, folder_path = parts + return { + "type": "cloudos", + "project_name": project_name.strip(), + "folder_path": folder_path.strip() + } + + +def build_session_payload( + name, + backend, + project_id, + execution_platform='aws', + instance_type='c5.xlarge', + storage_size=500, + is_spot=False, + is_shared=False, + cost_limit=-1, + shutdown_at=None, + data_files=None, + s3_mounts=None, + r_version=None, + spark_master_type=None, + spark_core_type=None, + spark_workers=1 +): + """Build the complex session creation payload for the API. + + Parameters + ---------- + name : str + Session name (1-100 characters) + backend : str + Backend type: regular, vscode, spark, rstudio + project_id : str + Project MongoDB ObjectId + execution_platform : str, optional + Execution platform: 'aws' (default) or 'azure' + instance_type : str + Instance type (EC2 for AWS, e.g., c5.xlarge; Azure VM size, e.g., Standard_F1s) + storage_size : int + Storage in GB (default: 500, range: 100-5000) + is_spot : bool + Use spot instances (AWS only, default: False) + is_shared : bool + Make session shared (default: False) + cost_limit : float + Compute cost limit in USD (default: -1 for unlimited) + shutdown_at : str + ISO8601 datetime for auto-shutdown (optional) + data_files : list + List of data file dicts. For AWS: CloudOS or S3. For Azure: CloudOS only. + s3_mounts : list + List of S3 mount dicts (AWS only, ignored for Azure) + r_version : str + R version for RStudio (required for rstudio backend) + spark_master_type : str + Spark master instance type (required for spark backend, AWS only) + spark_core_type : str + Spark core instance type (required for spark backend, AWS only) + spark_workers : int + Initial number of Spark workers (default: 1, AWS only) + + Returns + ------- + dict + Complete payload for API request + """ + # Validate inputs + if not 1 <= len(name) <= 100: + raise ValueError("Session name must be 1-100 characters") + + if not 100 <= storage_size <= 5000: + raise ValueError("Storage size must be between 100-5000 GB") + + if backend not in ['regular', 'vscode', 'spark', 'rstudio']: + raise ValueError("Invalid backend type") + + if execution_platform not in ['aws', 'azure']: + raise ValueError("Execution platform must be 'aws' or 'azure'") + + # Spark is AWS only + if backend == 'spark' and execution_platform != 'aws': + raise ValueError("Spark backend is only available on AWS") + + if backend == 'rstudio' and not r_version: + raise ValueError("R version (--r-version) is required for RStudio backend") + + if backend == 'spark' and (not spark_master_type or not spark_core_type): + raise ValueError("Spark master and core instance types are required for Spark backend") + + # Default shutdown to 24 hours if not provided + if not shutdown_at: + shutdown_at = (datetime.now(timezone.utc) + timedelta(hours=24)).isoformat().replace('+00:00', 'Z') + + # Build interactiveSessionConfiguration + config = { + "name": name, + "backend": backend, + "executionPlatform": execution_platform, + "instanceType": instance_type, + "isCostSaving": is_spot, + "storageSizeInGb": storage_size, + "storageMode": "regular", + "visibility": "workspace" if is_shared else "private", + "execution": { + "computeCostLimit": cost_limit, + "autoShutdownAtDate": shutdown_at + } + } + + # Add backend-specific fields + if backend == 'rstudio': + config['rVersion'] = r_version + + if backend == 'spark': + master_type = spark_master_type + core_type = spark_core_type + + config['cluster'] = { + "name": f"{name}-cluster", + "releaseLabel": "emr-7.3.0", + "ebsRootVolumeSizeInGb": 100, + "instances": { + "master": { + "type": master_type, + "costSaving": is_spot, + "storage": { + "type": "gp2", + "sizeInGbs": 50, + "volumesPerInstance": 1 + } + }, + "core": { + "type": core_type, + "costSaving": is_spot, + "storage": { + "type": "gp2", + "sizeInGbs": 50, + "volumesPerInstance": 1 + }, + "minNumberOfInstances": spark_workers, + "autoscaling": { + "minCapacity": spark_workers, + "maxCapacity": max(spark_workers * 2, 10) + } + }, + "tasks": [] + }, + "autoscaling": { + "minCapacity": spark_workers, + "maxCapacity": max(spark_workers * 2, 10) + }, + "id": None + } + + # Build complete payload + # For Azure, S3 mounts are not supported (fuseFileSystems should be empty) + payload = { + "interactiveSessionConfiguration": config, + "dataItems": data_files or [], + "fileSystemIds": [], # Always empty (legacy compatibility) + "fuseFileSystems": s3_mounts or [] if execution_platform == 'aws' else [], + "projectId": project_id + } + + return payload + + +def format_session_creation_table(session_data, instance_type=None, storage_size=None, + backend_type=None, r_version=None, + spark_master=None, spark_core=None, spark_workers=None, + data_files=None, s3_mounts=None): + """Display session creation result in table format. + + Parameters + ---------- + session_data : dict + Session data from API response + instance_type : str, optional + Instance type that was requested (for display if not in response) + storage_size : int, optional + Storage size that was requested (for display if not in response) + backend_type : str, optional + Backend type (regular, vscode, spark, rstudio) for backend-specific display + r_version : str, optional + R version for RStudio backend + spark_master : str, optional + Spark master instance type + spark_core : str, optional + Spark core instance type + spark_workers : int, optional + Number of Spark workers + data_files : list, optional + List of parsed data file objects to display + s3_mounts : list, optional + List of parsed S3 mount objects to display + + Returns + ------- + str + Formatted table output + """ + console = Console() + + table = Table(title="✓ Interactive Session Created Successfully") + table.add_column("Property", style="cyan") + table.add_column("Value", style="green") + + table.add_row("Session ID", session_data.get('_id', 'N/A')) + table.add_row("Name", session_data.get('name', 'N/A')) + table.add_row("Backend", session_data.get('interactiveSessionType', 'N/A')) + table.add_row("Status", session_data.get('status', 'N/A')) + + # Try to get instance type from response, fallback to provided value + response_instance = session_data.get('resources', {}).get('instanceType') or \ + session_data.get('interactiveSessionConfiguration', {}).get('instanceType') + instance_display = response_instance or instance_type or 'N/A' + table.add_row("Instance Type", instance_display) + + # Try to get storage size from response, fallback to provided value + response_storage = session_data.get('resources', {}).get('storageSizeInGb') or \ + session_data.get('interactiveSessionConfiguration', {}).get('storageSizeInGb') + storage_display = f"{response_storage} GB" if response_storage else (f"{storage_size} GB" if storage_size else "N/A") + table.add_row("Storage", storage_display) + + # Add backend-specific information + if backend_type == 'rstudio' and r_version: + table.add_row("R Version", r_version) + + if backend_type == 'spark': + spark_config = [] + if spark_master: + spark_config.append(f"Master: {spark_master}") + if spark_core: + spark_config.append(f"Core: {spark_core}") + if spark_workers: + spark_config.append(f"Workers: {spark_workers}") + + if spark_config: + table.add_row("Spark Cluster", ", ".join(spark_config)) + + # Display mounted data files + if data_files: + mounted_files = [] + for df in data_files: + if isinstance(df, dict): + # Handle CloudOS dataset files + if df.get('kind') == 'File': + name = df.get('name', 'Unknown') + mounted_files.append(name) + # Handle S3 files + elif df.get('type') == 'S3File': + data = df.get('data', {}) + name = data.get('name', 'Unknown') + mounted_files.append(f"{name} (S3)") + + if mounted_files: + table.add_row("Mounted Data", ", ".join(mounted_files)) + + # Display linked S3 buckets + if s3_mounts: + linked_s3 = [] + for s3 in s3_mounts: + if isinstance(s3, dict): + data = s3.get('data', {}) + bucket = data.get('s3BucketName', '') + prefix = data.get('s3Prefix', '') + # For CloudOS mounts, show project/path; for S3, show bucket/path + if prefix and bucket: + linked_s3.append(f"s3://{bucket}/{prefix}") + elif bucket: + linked_s3.append(f"s3://{bucket}/") + + if linked_s3: + table.add_row("Linked S3", "\n".join(linked_s3)) + + console.print(table) + console.print("\n[yellow]Note:[/yellow] Session provisioning typically takes 3-10 minutes.") + console.print("[cyan]Next steps:[/cyan] Use 'cloudos interactive-session list' to monitor status") diff --git a/docs/tutorial/cloudos-cli-training.ipynb b/docs/tutorials/cloudos-cli-training.ipynb similarity index 99% rename from docs/tutorial/cloudos-cli-training.ipynb rename to docs/tutorials/cloudos-cli-training.ipynb index 399e3dce..d1325b5c 100644 --- a/docs/tutorial/cloudos-cli-training.ipynb +++ b/docs/tutorials/cloudos-cli-training.ipynb @@ -463,4 +463,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tests/test_interactive_session/__init__.py b/tests/test_interactive_session/__init__.py new file mode 100644 index 00000000..8b4e6bc6 --- /dev/null +++ b/tests/test_interactive_session/__init__.py @@ -0,0 +1 @@ +"""Tests for interactive session module.""" diff --git a/tests/test_interactive_session/test_create_session.py b/tests/test_interactive_session/test_create_session.py new file mode 100644 index 00000000..ac3bf42f --- /dev/null +++ b/tests/test_interactive_session/test_create_session.py @@ -0,0 +1,482 @@ +"""Tests for interactive session create command.""" + +import pytest +import json +from click.testing import CliRunner +from cloudos_cli.__main__ import run_cloudos_cli +from unittest import mock +from unittest.mock import patch, MagicMock + + +class TestInteractiveSessionCreateCommand: + """Test the interactive session create command structure.""" + + def test_interactive_session_create_command_exists(self): + """Test that the 'interactive-session create' command exists.""" + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ['interactive-session', 'create', '--help']) + + # Command should exist and not error out + assert result.exit_code == 0 + assert 'create' in result.output.lower() + + def test_interactive_session_create_has_required_options(self): + """Test that required options are present in create command.""" + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ['interactive-session', 'create', '--help']) + + assert result.exit_code == 0 + # Check for required options + assert '--apikey' in result.output or '--apikey' in result.output + assert '--workspace-id' in result.output + assert '--project-name' in result.output + assert '--name' in result.output + assert '--session-type' in result.output + + def test_interactive_session_create_session_type_choices(self): + """Test that session type options are correct.""" + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ['interactive-session', 'create', '--help']) + + assert result.exit_code == 0 + # Check for session type choices + assert 'jupyter' in result.output.lower() or 'jupyter' in result.output + assert 'vscode' in result.output.lower() or 'vscode' in result.output + + def test_interactive_session_create_has_optional_configuration_options(self): + """Test that optional configuration options are present.""" + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ['interactive-session', 'create', '--help']) + + assert result.exit_code == 0 + # Check for optional options + assert '--instance' in result.output + assert '--storage' in result.output + assert '--spot' in result.output + assert '--shared' in result.output + assert '--cost-limit' in result.output + assert '--shutdown-in' in result.output + assert '--mount' in result.output + assert '--link' in result.output + assert '--r-version' in result.output + assert '--spark-master' in result.output + assert '--spark-core' in result.output + assert '--spark-workers' in result.output + + +class TestInteractiveSessionCreateIntegration: + """Integration tests for interactive session create command with mocked API.""" + + @pytest.fixture + def runner(self): + """Provide a CliRunner instance.""" + return CliRunner() + + def test_create_session_missing_required_options(self, runner): + """Test creating session without required options fails.""" + result = runner.invoke(run_cloudos_cli, [ + 'interactive-session', 'create', + '--apikey', 'test_key' + ]) + + # Should fail for missing required options + assert result.exit_code != 0 + + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_create_session_jupyter_basic(self, mock_config, mock_cloudos): + """Test creating a basic Jupyter session.""" + runner = CliRunner() + + # Mock the configuration loading + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team', + 'project_name': 'my_project' + } + + # Mock the Cloudos API calls + mock_cloudos_instance = MagicMock() + mock_cloudos.return_value = mock_cloudos_instance + mock_cloudos_instance.create_interactive_session.return_value = { + '_id': 'session_001', + 'name': 'Test Jupyter', + 'status': 'running', + 'interactiveSessionType': 'awsJupyterNotebook' + } + + result = runner.invoke(run_cloudos_cli, [ + 'interactive-session', 'create', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com', + '--workspace-id', 'test_team', + '--project-name', 'my_project', + '--name', 'Test Jupyter', + '--session-type', 'jupyter' + ]) + + # Command should execute (may fail at config loading but not at argument parsing) + assert 'Error' not in result.output or result.exit_code == 0 + + @patch('cloudos_cli.interactive_session.cli.resolve_data_file_id') + @patch('cloudos_cli.interactive_session.cli.Datasets') + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_create_session_with_all_options(self, mock_config, mock_cloudos, mock_datasets, mock_resolve): + """Test creating a session with all options specified.""" + runner = CliRunner() + + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team', + 'project_name': 'my_project' + } + + # Mock Datasets API for resolving mounted files + mock_resolve.return_value = { + 'type': 'CloudOSFile', + 'item': 'file_id_123' + } + + mock_cloudos_instance = MagicMock() + mock_cloudos.return_value = mock_cloudos_instance + mock_cloudos_instance.create_interactive_session.return_value = { + '_id': 'session_002', + 'name': 'Advanced Session', + 'status': 'provisioning' + } + + result = runner.invoke(run_cloudos_cli, [ + 'interactive-session', 'create', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com', + '--workspace-id', 'test_team', + '--project-name', 'my_project', + '--name', 'Advanced Session', + '--session-type', 'vscode', + '--instance', 'c5.2xlarge', + '--storage', '1000', + '--spot', + '--shared', + '--cost-limit', '50.0', + '--shutdown-in', '8h', + '--mount', 'MyDataset/datafile.csv' + ]) + + # Command should be invoked without syntax errors + assert result.exit_code == 0 + + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_create_session_spark_with_cluster_config(self, mock_config, mock_cloudos): + """Test creating a Spark session with cluster configuration.""" + runner = CliRunner() + + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team', + 'project_name': 'my_project' + } + + mock_cloudos_instance = MagicMock() + mock_cloudos.return_value = mock_cloudos_instance + mock_cloudos_instance.create_interactive_session.return_value = { + '_id': 'session_003', + 'name': 'Spark Cluster', + 'status': 'scheduled' + } + + result = runner.invoke(run_cloudos_cli, [ + 'interactive-session', 'create', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com', + '--workspace-id', 'test_team', + '--project-name', 'my_project', + '--name', 'Spark Cluster', + '--session-type', 'spark', + '--spark-master', 'c5.2xlarge', + '--spark-core', 'c5.xlarge', + '--spark-workers', '3' + ]) + + assert result.exit_code == 0 + + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_create_session_rstudio_with_r_version(self, mock_config, mock_cloudos): + """Test creating an RStudio session with R version.""" + runner = CliRunner() + + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team', + 'project_name': 'my_project' + } + + mock_cloudos_instance = MagicMock() + mock_cloudos.return_value = mock_cloudos_instance + mock_cloudos_instance.create_interactive_session.return_value = { + '_id': 'session_004', + 'name': 'RStudio Session', + 'status': 'running' + } + + result = runner.invoke(run_cloudos_cli, [ + 'interactive-session', 'create', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com', + '--workspace-id', 'test_team', + '--project-name', 'my_project', + '--name', 'RStudio Session', + '--session-type', 'rstudio', + '--r-version', '4.5.2' + ]) + + assert result.exit_code == 0 + + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_create_session_with_defaults(self, mock_config, mock_cloudos): + """Test creating a session with default values for optional parameters.""" + runner = CliRunner() + + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team', + 'project_name': 'my_project' + } + + mock_cloudos_instance = MagicMock() + mock_cloudos.return_value = mock_cloudos_instance + + mock_cloudos_instance.create_interactive_session.return_value = { + '_id': 'session_006', + 'name': 'Default Session', + 'status': 'scheduled', + 'backend_type': 'regular', + 'instance_type': 'c5.xlarge', + 'storage': 500 + } + + result = runner.invoke(run_cloudos_cli, [ + 'interactive-session', 'create', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com', + '--workspace-id', 'test_team', + '--project-name', 'my_project', + '--name', 'Default Session', + '--session-type', 'jupyter' + ]) + + assert result.exit_code == 0 + + +class TestInteractiveSessionAPIMethod: + """Unit tests for the create_interactive_session API method.""" + + def test_create_interactive_session_method_exists(self): + """Test that the create_interactive_session method exists in Cloudos class.""" + from cloudos_cli.clos import Cloudos + + assert hasattr(Cloudos, 'create_interactive_session') + assert callable(getattr(Cloudos, 'create_interactive_session')) + + def test_create_interactive_session_signature(self): + """Test that the method has the correct signature.""" + from cloudos_cli.clos import Cloudos + import inspect + + method = getattr(Cloudos, 'create_interactive_session') + sig = inspect.signature(method) + params = list(sig.parameters.keys()) + + assert 'self' in params + assert 'team_id' in params + assert 'payload' in params + assert 'verify' in params + + @patch('cloudos_cli.clos.retry_requests_post') + def test_create_interactive_session_api_call(self, mock_post): + """Test that the method makes the correct API call.""" + from cloudos_cli.clos import Cloudos + + # Setup mock response + mock_response = MagicMock() + mock_response.status_code = 201 + mock_response.json.return_value = { + '_id': 'session_001', + 'name': 'Test Session', + 'status': 'scheduled' + } + mock_post.return_value = mock_response + + # Create Cloudos instance and call method + cl = Cloudos('http://test.com', 'test_key', None) + payload = { + 'interactiveSessionConfiguration': { + 'backend': 'regular' + }, + 'projectId': 'proj_001' + } + result = cl.create_interactive_session('test_team', payload) + + # Verify API was called + assert mock_post.called + call_args = mock_post.call_args + # Check the endpoint contains the team ID + assert 'interactive-sessions' in call_args[0][0] + # Verify the result + assert result['_id'] == 'session_001' + + @patch('cloudos_cli.clos.retry_requests_post') + def test_create_interactive_session_error_handling(self, mock_post): + """Test error handling for failed API calls.""" + from cloudos_cli.clos import Cloudos + from cloudos_cli.utils.errors import BadRequestException + + # Setup mock error response + mock_response = MagicMock() + mock_response.status_code = 400 + mock_response.text = 'Bad request message' + mock_post.return_value = mock_response + + # Create Cloudos instance and call method + cl = Cloudos('http://test.com', 'test_key', None) + payload = {'test': 'data'} + + # Should raise BadRequestException for HTTP 400 + with pytest.raises(BadRequestException): + cl.create_interactive_session('test_team', payload) + + +class TestSessionCreatorHelpers: + """Unit tests for session_creator helper functions.""" + + def test_parse_shutdown_duration_function_exists(self): + """Test that parse_shutdown_duration function exists.""" + from cloudos_cli.interactive_session.interactive_session import parse_shutdown_duration + + assert callable(parse_shutdown_duration) + + def test_parse_shutdown_duration_hours(self): + """Test parsing shutdown duration in hours.""" + from cloudos_cli.interactive_session.interactive_session import parse_shutdown_duration + + result = parse_shutdown_duration('2h') + # Should return a datetime string + assert isinstance(result, str) + assert 'T' in result # ISO format + + def test_parse_shutdown_duration_days(self): + """Test parsing shutdown duration in days.""" + from cloudos_cli.interactive_session.interactive_session import parse_shutdown_duration + + result = parse_shutdown_duration('1d') + assert isinstance(result, str) + assert 'T' in result # ISO format + + def test_parse_data_file_function_exists(self): + """Test that parse_data_file function exists.""" + from cloudos_cli.interactive_session.interactive_session import parse_data_file + + assert callable(parse_data_file) + + def test_parse_data_file_format(self): + """Test parsing data file format.""" + from cloudos_cli.interactive_session.interactive_session import parse_data_file + + # Test CloudOS dataset with / separator: project_name/dataset_path + result = parse_data_file('leila-test/Data/mydata.csv') + assert isinstance(result, dict) + assert result['type'] == 'cloudos' + assert 'project_name' in result + assert 'dataset_path' in result + assert result['project_name'] == 'leila-test' + assert result['dataset_path'] == 'Data/mydata.csv' + + # Test CloudOS dataset with > separator + result2 = parse_data_file('leila-test > Data/mydata.csv') + assert result2['type'] == 'cloudos' + assert result2['project_name'] == 'leila-test' + assert result2['dataset_path'] == 'Data/mydata.csv' + + # Test CloudOS dataset with nested paths + result3 = parse_data_file('my-project/folder/subfolder/file.txt') + assert result3['type'] == 'cloudos' + assert result3['project_name'] == 'my-project' + assert result3['dataset_path'] == 'folder/subfolder/file.txt' + + # Test S3 file path + result4 = parse_data_file('s3://lifebit-featured-datasets/pipelines/phewas/100_binary_pheno.phe') + assert isinstance(result4, dict) + assert result4['type'] == 's3' + assert 's3_bucket' in result4 + assert 's3_prefix' in result4 + assert result4['s3_bucket'] == 'lifebit-featured-datasets' + assert result4['s3_prefix'] == 'pipelines/phewas/100_binary_pheno.phe' + + # Test S3 bucket root file + result5 = parse_data_file('s3://my-bucket/file.txt') + assert result5['type'] == 's3' + assert result5['s3_bucket'] == 'my-bucket' + assert result5['s3_prefix'] == 'file.txt' + + def test_resolve_data_file_id_function_exists(self): + """Test that resolve_data_file_id function exists.""" + from cloudos_cli.interactive_session.interactive_session import resolve_data_file_id + + assert callable(resolve_data_file_id) + + def test_build_session_payload_function_exists(self): + """Test that build_session_payload function exists.""" + from cloudos_cli.interactive_session.interactive_session import build_session_payload + + assert callable(build_session_payload) + + def test_build_session_payload_jupyter(self): + """Test building payload for Jupyter session.""" + from cloudos_cli.interactive_session.interactive_session import build_session_payload + + result = build_session_payload( + name='Test Session', + backend='regular', + instance_type='c5.xlarge', + storage_size=500, + project_id='proj_001' + ) + + assert isinstance(result, dict) + assert 'interactiveSessionConfiguration' in result + assert 'projectId' in result + assert result['projectId'] == 'proj_001' + assert result['interactiveSessionConfiguration']['backend'] == 'regular' + + def test_format_session_creation_table_function_exists(self): + """Test that format_session_creation_table function exists.""" + from cloudos_cli.interactive_session.interactive_session import format_session_creation_table + + assert callable(format_session_creation_table) + + def test_format_session_creation_table_output(self): + """Test formatting session creation output for table display.""" + from cloudos_cli.interactive_session.interactive_session import format_session_creation_table + + session_data = { + '_id': 'session_001', + 'name': 'Test Session', + 'status': 'scheduled', + 'interactiveSessionType': 'awsJupyterNotebook' + } + + result = format_session_creation_table(session_data) + # Should return a string representation + assert isinstance(result, (str, type(None))) or hasattr(result, '__str__') + + +if __name__ == '__main__': + pytest.main([__file__, '-v']) diff --git a/tests/test_interactive_session/test_list_sessions.py b/tests/test_interactive_session/test_list_sessions.py new file mode 100644 index 00000000..0fadf377 --- /dev/null +++ b/tests/test_interactive_session/test_list_sessions.py @@ -0,0 +1,250 @@ +"""Tests for interactive session list command.""" + +import pytest +import json +from click.testing import CliRunner +from cloudos_cli.__main__ import run_cloudos_cli +from unittest import mock +from unittest.mock import patch + + +class TestInteractiveSessionCommand: + """Test the interactive session command structure.""" + + def test_interactive_session_command_exists(self): + """Test that the 'interactive-session' command exists.""" + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ['interactive-session', '--help']) + + # Command should exist and not error out + assert result.exit_code == 0 + assert 'interactive session' in result.output.lower() + + def test_interactive_session_list_command_exists(self): + """Test that the 'interactive-session list' command exists.""" + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ['interactive-session', 'list', '--help']) + + # Command should exist and show help properly + assert result.exit_code == 0 + assert 'interactive session' in result.output.lower() + assert 'list' in result.output.lower() + + def test_interactive_session_list_has_required_options(self): + """Test that required options are present in list command.""" + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ['interactive-session', 'list', '--help']) + + assert result.exit_code == 0 + # Check for required options + assert '--apikey' in result.output + assert '--workspace-id' in result.output + # Check for optional filters + assert '--filter-status' in result.output + assert '--limit' in result.output + assert '--page' in result.output + assert '--filter-only-mine' in result.output + assert '--archived' in result.output + assert '--output-format' in result.output + + def test_interactive_session_list_output_format_options(self): + """Test that output format options are correct.""" + runner = CliRunner() + result = runner.invoke(run_cloudos_cli, ['interactive-session', 'list', '--help']) + + assert result.exit_code == 0 + # Check for format options + assert 'stdout' in result.output or 'stdout' in result.output.lower() + assert 'json' in result.output or 'json' in result.output.lower() + assert 'csv' in result.output or 'csv' in result.output.lower() + + +class TestInteractiveSessionListIntegration: + """Integration tests for interactive session list command with mocked API.""" + + @pytest.fixture + def runner(self): + """Provide a CliRunner instance.""" + return CliRunner() + + def test_list_sessions_missing_workspace_id(self, runner): + """Test listing sessions without workspace-id from command line. + + Note: If a default profile has workspace_id configured, it will be used + and the command will attempt the API call instead of failing validation. + This test just verifies the command can be invoked. + """ + result = runner.invoke(run_cloudos_cli, [ + 'interactive-session', 'list', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com' + ]) + + # Command should either fail with missing workspace-id error, or attempt + # an API call (if workspace_id is in the default profile) + # We just verify the command was invoked without syntax errors + assert result.exit_code != 0 # Should fail for some reason + + @patch('cloudos_cli.interactive_session.cli.Cloudos') + @patch('cloudos_cli.configure.configure.ConfigurationProfile.load_profile_and_validate_data') + def test_list_sessions_with_valid_params(self, mock_config, mock_cloudos): + """Test listing sessions with valid parameters.""" + runner = CliRunner() + + # Mock the configuration loading + mock_config.return_value = { + 'apikey': 'test_key', + 'cloudos_url': 'http://test.com', + 'workspace_id': 'test_team' + } + + # Mock the Cloudos API call + mock_cloudos_instance = mock.MagicMock() + mock_cloudos.return_value = mock_cloudos_instance + mock_cloudos_instance.get_interactive_session_list.return_value = { + 'sessions': [], + 'pagination_metadata': {'count': 0, 'page': 1, 'limit': 10, 'totalPages': 0} + } + + result = runner.invoke(run_cloudos_cli, [ + 'interactive-session', 'list', + '--apikey', 'test_key', + '--cloudos-url', 'http://test.com', + '--workspace-id', 'test_team' + ]) + + # Even if it fails due to config, we want to verify the command was invoked + # Success would mean no exceptions during argument parsing + assert 'No interactive sessions found' in result.output or result.exit_code == 0 + + +class TestInteractiveSessionAPIMethod: + """Unit tests for the get_interactive_session_list API method in Cloudos class.""" + + def test_get_interactive_session_list_method_exists(self): + """Test that the get_interactive_session_list method exists in Cloudos class.""" + from cloudos_cli.clos import Cloudos + + # Check if method exists + assert hasattr(Cloudos, 'get_interactive_session_list') + assert callable(getattr(Cloudos, 'get_interactive_session_list')) + + def test_get_interactive_session_list_signature(self): + """Test that the method has the correct signature.""" + from cloudos_cli.clos import Cloudos + import inspect + + method = getattr(Cloudos, 'get_interactive_session_list') + sig = inspect.signature(method) + params = list(sig.parameters.keys()) + + # Check for required parameters + assert 'self' in params + assert 'team_id' in params + # Check for optional parameters + assert 'page' in params + assert 'limit' in params + assert 'status' in params + assert 'owner_only' in params + assert 'include_archived' in params + assert 'verify' in params + + @patch('cloudos_cli.clos.retry_requests_get') + def test_get_interactive_session_list_api_call(self, mock_get): + """Test that the method makes the correct API call.""" + from cloudos_cli.clos import Cloudos + + # Setup mock response + mock_response = mock.MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + 'sessions': [ + { + '_id': 'session_001', + 'name': 'Test Session', + 'status': 'running', + 'interactiveSessionType': 'awsJupyterNotebook', + 'resources': {'instanceType': 'c5.xlarge'}, + 'totalCostInUsd': 1.50, + 'user': {'name': 'John'} + } + ], + 'paginationMetadata': { + 'Pagination-Count': 1, + 'Pagination-Page': 1, + 'Pagination-Limit': 10 + } + } + mock_get.return_value = mock_response + + # Create Cloudos instance and call method + cl = Cloudos('http://test.com', 'test_key', None) + result = cl.get_interactive_session_list('test_team') + + # Verify API was called + assert mock_get.called + assert 'interactive-sessions' in mock_get.call_args[0][0] + assert result['sessions'][0]['_id'] == 'session_001' + assert result['pagination_metadata']['count'] == 1 + + @patch('cloudos_cli.clos.retry_requests_get') + def test_get_interactive_session_list_with_filters(self, mock_get): + """Test that filters are correctly passed to the API.""" + from cloudos_cli.clos import Cloudos + + # Setup mock response + mock_response = mock.MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + 'sessions': [], + 'paginationMetadata': { + 'Pagination-Count': 0, + 'Pagination-Page': 1, + 'Pagination-Limit': 10 + } + } + mock_get.return_value = mock_response + + # Create Cloudos instance and call method with filters + cl = Cloudos('http://test.com', 'test_key', None) + result = cl.get_interactive_session_list( + 'test_team', + page=2, + limit=20, + status=['running', 'initialising'], + owner_only=True, + include_archived=True + ) + + # Verify API was called with correct parameters + assert mock_get.called + call_args = mock_get.call_args + params = call_args[1]['params'] + + assert params['page'] == 2 + assert params['limit'] == 20 + assert params['onlyOwnerSessions'] == 'true' + assert params['archived.status'] == 'true' + + def test_get_interactive_session_list_validation(self): + """Test that method validates input parameters.""" + from cloudos_cli.clos import Cloudos + + cl = Cloudos('http://test.com', 'test_key', None) + + # Test invalid team_id + with pytest.raises(ValueError): + cl.get_interactive_session_list(None) + + # Test invalid page + with pytest.raises(ValueError): + cl.get_interactive_session_list('test_team', page=0) + + # Test invalid limit + with pytest.raises(ValueError): + cl.get_interactive_session_list('test_team', limit=150) + + +if __name__ == '__main__': + pytest.main([__file__, '-v']) +