Datasets csv (#279)

danielboloc · web-flow · commit 191d14e2f5f8 · 2025-12-04T16:27:39.000+01:00
* feat: add csv output

* add PR template

* update csv without --details

* add PR template

* update basename

* docs: update changes, readme and version

* update basename

* update basename
diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
@@ -0,0 +1,53 @@
+# Overview
+
+Does this
+
+# JIRA
+> Please add here as many related tasks this PR covers with its brief description, if more than one ticket
+
+- https://lifebit.atlassian.net/browse/LP-XXXX - adds CSV, JSON outputs
+- https://lifebit.atlassian.net/browse/LP-XXXX - pytests
+- https://lifebit.atlassian.net/browse/LP-XXXX - documentation
+
+# Changes
+
+- Implements X
+- Refactors Y
+- Adds/Removes Z
+
+# Acceptance Criteria
+> Please add here as many scenarios as in the Story
+
+> Normally this acceptance criteria is tested in ADAPT workspace in PROD
+
+<details>
+<summary>Scenario 1 - proof this scenario passes</summary>
+</details>
+
+<details>
+<summary>Scenario 2 - proof this scenario passes</summary>
+</details>
+
+<details>
+<summary>Scenario X - proof this scenario passes</summary>
+</details>
+
+
+# DEV
+> This Environment is interchangable with PROD if the acceptance criteria can only be tested in DEV for example. If that is the case please name this section PROD (or any new environment)
+
+<details>
+<summary>Proof this feature/patch works in this environment</summary>
+</details>
+
+# AZURE
+
+<details>
+<summary>Proof this feature/patch works in this environment</summary>
+</details>
+
+# Interactive Analysis
+
+<details>
+<summary>Proof this feature/patch works in this environment</summary>
+</details>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,12 @@
 ## lifebit-ai/cloudos-cli: changelog
 
+## v2.73.0 (2025-12-02)
+
+### Feat
+
+- Adds CSV output format for `datasets ls` with or without `--details`
+- Adds PR template
+
 ## v2.72.0 (2025-12-02)
 
 ### Feat
diff --git a/README.md b/README.md
@@ -1634,6 +1634,39 @@ If you require more information on the files and folder listed, you can use the
 - Virtual Name (the file or folder name)
 - Storage Path
 
+**Output Format Options**
+
+The `datasets ls` command supports different output formats using the `--output-format` option:
+
+- **`stdout` (default)**: Displays results in the console with Rich formatting
+  - Without `--details`: Simple list of file/folder names with color coding (blue underlined for folders)
+  - With `--details`: Rich formatted table with all file information
+  
+- **`csv`**: Saves results to a CSV file
+  - Without `--details`: CSV with two columns: "Name,Storage Path"
+  - With `--details`: CSV with columns "Type, Owner, Size, Size (bytes), Last Updated, Virtual Name, Storage Path"
+
+Examples:
+
+```bash
+# Simple list to console (default)
+cloudos datasets ls Data --profile my_profile
+
+# Detailed table in console
+cloudos datasets ls Data --details --profile my_profile
+
+# Simple CSV output
+cloudos datasets ls Data --profile my_profile --output-format csv
+
+# Detailed CSV output
+cloudos datasets ls Data --details --output-format csv --profile my_profile
+
+# Custom output filename
+cloudos datasets ls Data --details --output-format csv --output-basename my_files --profile my_profile
+```
+
+When using `--output-format csv`, you can optionally specify a custom base filename using `--output-basename`. If not provided, the filename will be auto-generated based on the path (e.g., `datasets_ls.csv`).
+
 #### Move Files
 
 Relocate files and folders within the same project or across different projects. This is useful for reorganizing data and moving results to appropriate locations.
diff --git a/cloudos_cli/__main__.py b/cloudos_cli/__main__.py
@@ -2997,6 +2997,16 @@ def run_bash_array_job(ctx,
                     'Details contains "Type", "Owner", "Size", "Last Updated", ' +
                     '"Virtual Name", "Storage Path".'),
               is_flag=True)
+@click.option('--output-format',
+              help=('The desired display for the output, either directly in standard output or saved as file. ' +
+                    'Default=stdout.'),
+              type=click.Choice(['stdout', 'csv'], case_sensitive=False),
+              default='stdout')
+@click.option('--output-basename',
+              help=('Output file base name to save jobs details. ' +
+                    'Default=datasets_ls'),
+              default='datasets_ls',
+              required=False)
 @click.pass_context
 @with_profile_config(required_params=['apikey', 'workspace_id'])
 def list_files(ctx,
@@ -3008,7 +3018,9 @@ def list_files(ctx,
                project_name,
                profile,
                path,
-               details):
+               details,
+               output_format,
+               output_basename):
     """List contents of a path within a CloudOS workspace dataset."""
     verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert)
 
@@ -3024,89 +3036,144 @@ def list_files(ctx,
     try:
         result = datasets.list_folder_content(path)
         contents = result.get("contents") or result.get("datasets", [])
+
         if not contents:
             contents = result.get("files", []) + result.get("folders", [])
 
-        if details:
-            console = Console(width=None)
-            table = Table(show_header=True, header_style="bold white")
-            table.add_column("Type", style="cyan", no_wrap=True)
-            table.add_column("Owner", style="white")
-            table.add_column("Size", style="magenta")
-            table.add_column("Last Updated", style="green")
-            table.add_column("Virtual Name", style="bold", overflow="fold")
-            table.add_column("Storage Path", style="dim", no_wrap=False, overflow="fold", ratio=2)
-
-            for item in contents:
-                is_folder = "folderType" in item or item.get("isDir", False)
-                type_ = "folder" if is_folder else "file"
-
-                # Enhanced type information
-                if is_folder:
-                    folder_type = item.get("folderType")
-                    if folder_type == "VirtualFolder":
-                        type_ = "virtual folder"
-                    elif folder_type == "S3Folder":
-                        type_ = "s3 folder"
-                    elif folder_type == "AzureBlobFolder":
-                        type_ = "azure folder"
-                    else:
-                        type_ = "folder"
-                else:
-                    # Check if file is managed by Lifebit (user uploaded)
-                    is_managed_by_lifebit = item.get("isManagedByLifebit", False)
-                    if is_managed_by_lifebit:
-                        type_ = "file (user uploaded)"
-                    else:
-                        type_ = "file (virtual copy)"
-                        
-                user = item.get("user", {})
-                if isinstance(user, dict):
-                    name = user.get("name", "").strip()
-                    surname = user.get("surname", "").strip()
-                else:
-                    name = surname = ""
-                if name and surname:
-                    owner = f"{name} {surname}"
-                elif name:
-                    owner = name
-                elif surname:
-                    owner = surname
+        # Process items to extract data
+        processed_items = []
+        for item in contents:
+            is_folder = "folderType" in item or item.get("isDir", False)
+            type_ = "folder" if is_folder else "file"
+
+            # Enhanced type information
+            if is_folder:
+                folder_type = item.get("folderType")
+                if folder_type == "VirtualFolder":
+                    type_ = "virtual folder"
+                elif folder_type == "S3Folder":
+                    type_ = "s3 folder"
+                elif folder_type == "AzureBlobFolder":
+                    type_ = "azure folder"
                 else:
-                    owner = "-"
-
-                raw_size = item.get("sizeInBytes", item.get("size"))
-                size = format_bytes(raw_size) if not is_folder and raw_size is not None else "-"
-
-                updated = item.get("updatedAt") or item.get("lastModified", "-")
-                filepath = item.get("name", "-")
-
-                if item.get("fileType") == "S3File" or item.get("folderType") == "S3Folder":
-                    bucket = item.get("s3BucketName")
-                    key = item.get("s3ObjectKey") or item.get("s3Prefix")
-                    s3_path = f"s3://{bucket}/{key}" if bucket and key else "-"
-                elif item.get("fileType") == "AzureBlobFile" or item.get("folderType") == "AzureBlobFolder":
-                    account = item.get("blobStorageAccountName")
-                    container = item.get("blobContainerName")
-                    key = item.get("blobName") if item.get("fileType") == "AzureBlobFile" else item.get("blobPrefix")
-                    s3_path = f"az://{account}.blob.core.windows.net/{container}/{key}" if account and container and key else "-"
+                    type_ = "folder"
+            else:
+                # Check if file is managed by Lifebit (user uploaded)
+                is_managed_by_lifebit = item.get("isManagedByLifebit", False)
+                if is_managed_by_lifebit:
+                    type_ = "file (user uploaded)"
                 else:
-                    s3_path = "-"
-
-                style = Style(color="blue", underline=True) if is_folder else None
-                table.add_row(type_, owner, size, updated, filepath, s3_path, style=style)
+                    type_ = "file (virtual copy)"
+                    
+            user = item.get("user", {})
+            if isinstance(user, dict):
+                name = user.get("name", "").strip()
+                surname = user.get("surname", "").strip()
+            else:
+                name = surname = ""
+            if name and surname:
+                owner = f"{name} {surname}"
+            elif name:
+                owner = name
+            elif surname:
+                owner = surname
+            else:
+                owner = "-"
+
+            raw_size = item.get("sizeInBytes", item.get("size"))
+            size = format_bytes(raw_size) if not is_folder and raw_size is not None else "-"
+
+            updated = item.get("updatedAt") or item.get("lastModified", "-")
+            filepath = item.get("name", "-")
+
+            if item.get("fileType") == "S3File" or item.get("folderType") == "S3Folder":
+                bucket = item.get("s3BucketName")
+                key = item.get("s3ObjectKey") or item.get("s3Prefix")
+                storage_path = f"s3://{bucket}/{key}" if bucket and key else "-"
+            elif item.get("fileType") == "AzureBlobFile" or item.get("folderType") == "AzureBlobFolder":
+                account = item.get("blobStorageAccountName")
+                container = item.get("blobContainerName")
+                key = item.get("blobName") if item.get("fileType") == "AzureBlobFile" else item.get("blobPrefix")
+                storage_path = f"az://{account}.blob.core.windows.net/{container}/{key}" if account and container and key else "-"
+            else:
+                storage_path = "-"
+
+            processed_items.append({
+                'type': type_,
+                'owner': owner,
+                'size': size,
+                'raw_size': raw_size,
+                'updated': updated,
+                'name': filepath,
+                'storage_path': storage_path,
+                'is_folder': is_folder
+            })
+
+        # Output handling
+        if output_format == 'csv':
+            import csv
+
+            csv_filename = f'{output_basename}.csv'
+
+            if details:
+                # CSV with all details
+                with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
+                    fieldnames = ['Type', 'Owner', 'Size', 'Size (bytes)', 'Last Updated', 'Virtual Name', 'Storage Path']
+                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                    writer.writeheader()
+
+                    for item in processed_items:
+                        writer.writerow({
+                            'Type': item['type'],
+                            'Owner': item['owner'],
+                            'Size': item['size'],
+                            'Size (bytes)': item['raw_size'] if item['raw_size'] is not None else '',
+                            'Last Updated': item['updated'],
+                            'Virtual Name': item['name'],
+                            'Storage Path': item['storage_path']
+                        })
+            else:
+                # CSV with just names
+                with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
+                    writer = csv.writer(csvfile)
+                    writer.writerow(['Name', 'Storage Path'])
+                    for item in processed_items:
+                        writer.writerow([item['name'], item['storage_path']])
+
+            click.secho(f'\nDatasets list saved to: {csv_filename}', fg='green', bold=True)
+
+        else:  # stdout
+            if details:
+                console = Console(width=None)
+                table = Table(show_header=True, header_style="bold white")
+                table.add_column("Type", style="cyan", no_wrap=True)
+                table.add_column("Owner", style="white")
+                table.add_column("Size", style="magenta")
+                table.add_column("Last Updated", style="green")
+                table.add_column("Virtual Name", style="bold", overflow="fold")
+                table.add_column("Storage Path", style="dim", no_wrap=False, overflow="fold", ratio=2)
+
+                for item in processed_items:
+                    style = Style(color="blue", underline=True) if item['is_folder'] else None
+                    table.add_row(
+                        item['type'],
+                        item['owner'],
+                        item['size'],
+                        item['updated'],
+                        item['name'],
+                        item['storage_path'],
+                        style=style
+                    )
 
-            console.print(table)
+                console.print(table)
 
-        else:
-            console = Console()
-            for item in contents:
-                name = item.get("name", "")
-                is_folder = item.get("folderType") or item.get("isDir")
-                if is_folder:
-                    console.print(f"[blue underline]{name}[/]")
-                else:
-                    console.print(name)
+            else:
+                console = Console()
+                for item in processed_items:
+                    if item['is_folder']:
+                        console.print(f"[blue underline]{item['name']}[/]")
+                    else:
+                        console.print(item['name'])
 
     except Exception as e:
         raise ValueError(f"Failed to list files for project '{project_name}': {str(e)}")
diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py
@@ -1 +1 @@
-__version__ = '2.72.0'
+__version__ = '2.73.0'

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '2.72.0'`
	`1`	`+__version__ = '2.73.0'`