diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4bb39455..07b3eaf3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,11 +13,11 @@ jobs: test-deploy-book: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 # Install dependencies - name: Set up Python 3.12 - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: 3.12 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 50f4dfeb..cac2211d 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -14,11 +14,11 @@ jobs: deploy-book: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 # Install dependencies - name: Set up Python 3.12 - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: 3.12 diff --git a/.github/workflows/modules_ci.yaml b/.github/workflows/modules_ci.yaml index 4d9d0f4a..0c813de9 100644 --- a/.github/workflows/modules_ci.yaml +++ b/.github/workflows/modules_ci.yaml @@ -14,7 +14,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install Lmod dependencies run: | diff --git a/containers/ollama/build_container.sh b/containers/ollama/build_container.sh index e5815ee0..53db8a66 100755 --- a/containers/ollama/build_container.sh +++ b/containers/ollama/build_container.sh @@ -4,7 +4,7 @@ # TODO: MP should pull a CUDA enabled version instead? # specify version -VERSION="0.13.2" +VERSION="0.17.7" TAG=${VERSION} IMAGE_NAME="ollama" MODULE_FOLDER="testing/ollama" diff --git a/docs/_toc.yml b/docs/_toc.yml index e5dd09b5..a1e9cdd8 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -5,6 +5,7 @@ defaults: parts: - caption: Announcements chapters: + - file: announcements/maintenance_sb26.md - file: work_deprecation.md - caption: Getting Started chapters: diff --git a/docs/announcements/maintenance_sb26.md b/docs/announcements/maintenance_sb26.md new file mode 100644 index 00000000..3f7e158c --- /dev/null +++ b/docs/announcements/maintenance_sb26.md @@ -0,0 +1,42 @@ +# HPC Maintenance, Spring Break 2026 (March 17-19) + +All HPC and related services will be taken offline for maintenance during spring break. + +## SuperPOD Maintenance + +The SuperPOD will be taken offline for maintenance between 6AM, Tuesday, March 17 and 6PM Thursday, March 19. It will not be possible to access the SuperPOD and no jobs will be allowed to run while maintenance is ongoing. The primary purpose of the maintenance is to implement vendor recommended updates to the $SCRATCH storage system to improve redundancy and reliability. + +## M3 Maintenance + +M3 will be taken offline for maintenance between 6AM, Wednesday, March 18 and 6PM Thursday, March 19. It will not be possible to access M3, the SuperPOD, Open OnDemand, or any HPC storage. Jobs will not be allowed to run while maintenance is ongoing. The primary purpose of this maintenance address recent performance issues that require components of the storage systems to be restarted. During this time, the Open OnDemand web portal will also be updated. + +## Open OnDemand Maintenance (https://hpc.smu.edu) + +The Open OnDemand web portal will be updated for security and bug fixes. The web portal runs on M3, so it will remain offline until the M3 maintenance is completed. + +## ColdFront Downtime (https://hpcaccess.smu.edu) + +ColdFront relies on services running on M3 and the SuperPOD, so it will be taken offline while the above maintenance is occurring. Any allocations that are scheduled to expire during the maintenance will automatically be extended by 7 days. + +## New Login Node Resource Limits + +Resource limits will be added to all HPC login nodes the week of March 16. + +Login Node Memory limits: + +- Users utilizing more than 14GB of RAM on login nodes will automatically have their processes throttled (slowed down) +- Users utilizing more than 16GB of RAM on login nodes will automatically have their processes terminated + +Login Node CPU Limits: + +- Users will be limited to 400% CPU usage on login nodes (the equivalent of fully utilizing 4 cores.) + +These limits are being implemented to maintain the stability of the system for all users. Running tasks on login nodes remains forbidden and tasks found running on login nodes may be terminated without notice. If you need assistance submitting jobs to the HPC systems, please open a help ticket by emailing help@smu.edu with “[HPC]” included in the subject line or attend one of the regularly scheduled office hours and support sessions. + +## Reminder about changes to DUO and Microsoft Authenticator + +Most SMU services and logins have transitioned to Microsoft Authenticator. This includes the Open OnDemand web portal (https://hpc.smu.edu) and ColdFront (https://hpcaccess.smu.edu). Please see https://www.smu.edu/oit/services/microsoft-authenticator for more details. Access to HPC systems using SSH will continue to require DUO authentication. Microsoft Authenticator will not work for SSH login sessions. We recommend that you keep the DUO app on your phone or other devices if you plan to use HPC services. + +## Questions or Concerns + +Please [contact us.](about:contact) if you have any questions or concerns about the scheduled maintenance. diff --git a/docs/policies/policies.md b/docs/policies/policies.md index 31c29673..ee211e97 100644 --- a/docs/policies/policies.md +++ b/docs/policies/policies.md @@ -11,6 +11,29 @@ when using HPC resources. This includes, but is not limited to, Please review the [ODSRCI Guidelines](https://www.smu.edu/provost/odonnell-institute/hpc/guidelines) +## Data Restrictions + +Please review [SMU's Institutional Data Governance +](https://www.smu.edu/policy/8-information-technology/8-6-institutional-data-governance). + +SMU's HPC systems are generally not approved for use with restricted or confidential data. +This includes, but is not limited to: + +- personally identifiable data +- protected health information +- FERPA protected data + +In most cases, unpublished research is classified as private data according to SMU policies. +Private data can generally be used on the HPC systems. +However, SMU's HPC system is a research and academic tool and may lack adequate protections for some types of +private data. + +Data classified as public can always be used on the system as long as owning, storing, or using the data does +not violate other SMU policies, contractual agreements, laws, or other regulations. + +Please consult with [SMU's Office of Research and Innovation (ORI)](https://www.smu.edu/provost/research) +to ensure compliance before utilizing HPC resources. + ## Purge Policy Data stored in `$SCRATCH` on M3 or the SuperPod has a time based quota. @@ -27,6 +50,24 @@ Storage space is also limited and current and active usage is prioritized. Please [contact us](../about/contact.md) to discuss needs and potential options. ::: +## Login Nodes + +Running code, tasks, and persitent processes on login nodes is forbidden. The login nodes +are a shared environment and running intensive tasks can make the entire system unstable for +all users. Tasks running on login nodes may be terminated by system adminstrators to preserve +system stability. + +Some examples of acceptable use of login nodes: + +- Submitting and monitoring jobs. +- Editing or viewing files with text editors, IDEs, or similar. +- Compiling code (compilation tasks that are expect to take a long time or + require extensive resources should be run inside of a job) +- Building Python environments, such as with Conda, UV, or Venv. +- Downloading files (for large data transfers, please request access to the data transfer nodes) + +If you need assistance submitting jobs, please [contact us](../about/contact.md). + ## Account and Account Password Sharing Policy No two individuals should share the same HPC account, nor diff --git a/modules/m3/applications/testing/ollama/0.13.2.lua b/modules/m3/applications/testing/ollama/0.13.2.lua new file mode 100644 index 00000000..b10b0acc --- /dev/null +++ b/modules/m3/applications/testing/ollama/0.13.2.lua @@ -0,0 +1,22 @@ + +help([[ +Name: Ollama +Version: 0.13.2 +Website: https://ollama.com/ + +Ollama is an open-source tool that allows you to run large language models (LLMs) like Llama 3 and Mistral directly on your local machine + +]]) +whatis("Name: Ollama") +whatis("Version: ollama:0.13.2") +whatis("Category: Ollama") +whatis("URL: https://hub.docker.com/r/ollama/ollama") +whatis("Description: Provides access to Ollama through a container built with Apptainer") +family("Ollama") + +always_load('apptainer') +local sif_file = '/hpc/m3/containers/ollama/ollama_0.13.2.sif' + +setenv('CONTAINER_IMAGE', sif_file) +source_sh("bash", "/hpc/m3/hpc_docs/utils/ollama_scripts/ollama.sh") + diff --git a/modules/m3/applications/testing/ollama/0.15.1.lua b/modules/m3/applications/testing/ollama/0.15.1.lua new file mode 100644 index 00000000..9782b81e --- /dev/null +++ b/modules/m3/applications/testing/ollama/0.15.1.lua @@ -0,0 +1,22 @@ + +help([[ +Name: Ollama +Version: 0.15.1 +Website: https://ollama.com/ + +Ollama is an open-source tool that allows you to run large language models (LLMs) like Llama 3 and Mistral directly on your local machine + +]]) +whatis("Name: Ollama") +whatis("Version: ollama:0.15.1") +whatis("Category: Ollama") +whatis("URL: https://hub.docker.com/r/ollama/ollama") +whatis("Description: Provides access to Ollama through a container built with Apptainer") +family("Ollama") + +always_load('apptainer') +local sif_file = '/hpc/m3/containers/ollama/ollama_0.15.1.sif' + +setenv('CONTAINER_IMAGE', sif_file) +source_sh("bash", "/hpc/m3/apps/ollama/helper_scripts/ollama.sh") + diff --git a/modules/m3/applications/testing/ollama/0.17.7.lua b/modules/m3/applications/testing/ollama/0.17.7.lua new file mode 100644 index 00000000..69334dad --- /dev/null +++ b/modules/m3/applications/testing/ollama/0.17.7.lua @@ -0,0 +1,22 @@ + +help([[ +Name: Ollama +Version: 0.17.7 +Website: https://ollama.com/ + +Ollama is an open-source tool that allows you to run large language models (LLMs) like Llama 3 and Mistral directly on your local machine + +]]) +whatis("Name: Ollama") +whatis("Version: ollama:0.17.7") +whatis("Category: Ollama") +whatis("URL: https://hub.docker.com/r/ollama/ollama") +whatis("Description: Provides access to Ollama through a container built with Apptainer") +family("Ollama") + +always_load('apptainer') +local sif_file = '/hpc/m3/containers/ollama/ollama_0.17.7.sif' + +setenv('CONTAINER_IMAGE', sif_file) +source_sh("bash", "/hpc/m3/apps/ollama/helper_scripts/ollama.sh") + diff --git a/modules/mp/apps/testing/ollama/0.13.2.lua b/modules/mp/apps/testing/ollama/0.13.2.lua new file mode 100644 index 00000000..b10b0acc --- /dev/null +++ b/modules/mp/apps/testing/ollama/0.13.2.lua @@ -0,0 +1,22 @@ + +help([[ +Name: Ollama +Version: 0.13.2 +Website: https://ollama.com/ + +Ollama is an open-source tool that allows you to run large language models (LLMs) like Llama 3 and Mistral directly on your local machine + +]]) +whatis("Name: Ollama") +whatis("Version: ollama:0.13.2") +whatis("Category: Ollama") +whatis("URL: https://hub.docker.com/r/ollama/ollama") +whatis("Description: Provides access to Ollama through a container built with Apptainer") +family("Ollama") + +always_load('apptainer') +local sif_file = '/hpc/m3/containers/ollama/ollama_0.13.2.sif' + +setenv('CONTAINER_IMAGE', sif_file) +source_sh("bash", "/hpc/m3/hpc_docs/utils/ollama_scripts/ollama.sh") + diff --git a/modules/mp/apps/testing/ollama/0.15.1.lua b/modules/mp/apps/testing/ollama/0.15.1.lua new file mode 100644 index 00000000..9782b81e --- /dev/null +++ b/modules/mp/apps/testing/ollama/0.15.1.lua @@ -0,0 +1,22 @@ + +help([[ +Name: Ollama +Version: 0.15.1 +Website: https://ollama.com/ + +Ollama is an open-source tool that allows you to run large language models (LLMs) like Llama 3 and Mistral directly on your local machine + +]]) +whatis("Name: Ollama") +whatis("Version: ollama:0.15.1") +whatis("Category: Ollama") +whatis("URL: https://hub.docker.com/r/ollama/ollama") +whatis("Description: Provides access to Ollama through a container built with Apptainer") +family("Ollama") + +always_load('apptainer') +local sif_file = '/hpc/m3/containers/ollama/ollama_0.15.1.sif' + +setenv('CONTAINER_IMAGE', sif_file) +source_sh("bash", "/hpc/m3/apps/ollama/helper_scripts/ollama.sh") + diff --git a/modules/mp/apps/testing/ollama/0.17.7.lua b/modules/mp/apps/testing/ollama/0.17.7.lua new file mode 100644 index 00000000..69334dad --- /dev/null +++ b/modules/mp/apps/testing/ollama/0.17.7.lua @@ -0,0 +1,22 @@ + +help([[ +Name: Ollama +Version: 0.17.7 +Website: https://ollama.com/ + +Ollama is an open-source tool that allows you to run large language models (LLMs) like Llama 3 and Mistral directly on your local machine + +]]) +whatis("Name: Ollama") +whatis("Version: ollama:0.17.7") +whatis("Category: Ollama") +whatis("URL: https://hub.docker.com/r/ollama/ollama") +whatis("Description: Provides access to Ollama through a container built with Apptainer") +family("Ollama") + +always_load('apptainer') +local sif_file = '/hpc/m3/containers/ollama/ollama_0.17.7.sif' + +setenv('CONTAINER_IMAGE', sif_file) +source_sh("bash", "/hpc/m3/apps/ollama/helper_scripts/ollama.sh") + diff --git a/motd/m3/cli_motd.txt b/motd/m3/cli_motd.txt index 46b6cb19..d173edd9 100644 --- a/motd/m3/cli_motd.txt +++ b/motd/m3/cli_motd.txt @@ -27,21 +27,30 @@ $SCRATCH | 60 days | Temporary scratch space IMPORTANT ANNOUNCEMENTS: -Reminder: running jobs now requires being on an active allocation in ColdFront (https://hpcaccess.smu.edu). - See the documentation for running jobs: https://southernmethodistuniversity.github.io/hpc_docs/coldfront/running_jobs.html -==================== +===================== $WORK is End of Life! -==================== +===================== As of August 15, 2025 $WORK is no longer supported. Any data remaining in $WORK should be moved as soon as possible. -$WORK directories are subject to be deprovisioned. +$WORK directories are subject to be deprovisioned (permanently deleted). Please contact help@smu.edu with [HPC] in the subject line with any questions or concerns. +=========== +Maintenance +=========== + +Maintenance on M3 is scheduled between between 6AM, Wednesday, March 18 and +6PM Thursday, March 19. It will not be possible to access the system or +run jobs during this time. + +For more details see: +https://southernmethodistuniversity.github.io/hpc_docs/announcements/maintenance_sb26.html + ******************************************************************************** diff --git a/motd/m3/ood_announcements/announcement.yml b/motd/m3/ood_announcements/announcement.yml index d8ff35e5..30221b72 100644 --- a/motd/m3/ood_announcements/announcement.yml +++ b/motd/m3/ood_announcements/announcement.yml @@ -1,11 +1,10 @@ type: warning dismissible: false msg: | - Beginning February 25, 2026, SMU will be transitioning from Duo to Microsoft Authenticator for most multi-factor authentication (MFA). + M3 will be taken offline for scheduled maintenance between 6AM, Wednesday, March 18 and 6PM Thursday, March 19. + While M3 maintenance is ongoing, the Open OnDemand web portal (this site) will be unavailable. - Microsoft Authenticator will be required to access the HPC OnDemand web portal and the ColdFront account management platform. SSH access to the HPC systems will continue to require DUO. + For more information, please see the [maintenance announcement](https://southernmethodistuniversity.github.io/hpc_docs/announcements/maintenance_sb26.html). - For more information or to [opt-in early](https://smu.edu/passwordless), please visit the [Microsoft Authenticator service page](https://www.smu.edu/oit/services/microsoft-authenticator). - - If you need assistance, please contact the [IT Help Desk](https://www.smu.edu/oit/help). + If you have any questions or concerns, please contact the [IT Help Desk](https://www.smu.edu/oit/help). diff --git a/motd/mp/cli_motd.txt b/motd/mp/cli_motd.txt new file mode 100644 index 00000000..37429307 --- /dev/null +++ b/motd/mp/cli_motd.txt @@ -0,0 +1,57 @@ + + WELCOME TO THE + _____ _____ ____ _____ + / ____| | __ \ / __ \| __ \ + | (___ _ _ _ __ ___ _ __| |__) | | | | | | | + \___ \| | | | '_ \ / _ \ '__| ___/| | | | | | | + ____) | |_| | |_) | __/ | | | | |__| | |__| | + |_____/ \__,_| .__/ \___|_| |_| \____/|_____/ + | | + |_| +******************************************************************************** + +Documentation: https://southernmethodistuniversity.github.io/hpc_docs +Help: help@smu.edu with "[HPC]" in subject line + +Storage Locations: + +Variable or Path | Quota | Usage +---------------- | ------- | --------------------------------------------------- +$HOME | 200 GB | Home directory, backed up +$WORK | 0 | End of Life on August 15, 2025 +$SCRATCH | 60 days | Temporary scratch space +/projects | varies | ColdFront storage allocations + +*Do not* use login nodes or $HOME for calculations + +******************************************************************************** + +IMPORTANT ANNOUNCEMENTS: + +See the documentation for running jobs: +https://southernmethodistuniversity.github.io/hpc_docs/coldfront/running_jobs.html + + +===================== +$WORK is End of Life! +===================== + +As of August 15, 2025 $WORK is no longer supported. +Any data remaining in $WORK should be moved as soon as possible. +$WORK directories are subject to be deprovisioned (permanently deleted). + +Please contact help@smu.edu with [HPC] in the subject line with any questions +or concerns. + +=========== +Maintenance +=========== + +Maintenance on SuperPOD is scheduled between between 6AM, Tuesday, March 17 and +6PM Thursday, March 19. It will not be possible to access the system or +run jobs during this time. + +For more details see: +https://southernmethodistuniversity.github.io/hpc_docs/announcements/maintenance_sb26.html + +********************************************************************************