diff --git a/THIRD-PARTY b/THIRD-PARTY deleted file mode 100644 index 7da5928..0000000 --- a/THIRD-PARTY +++ /dev/null @@ -1,264 +0,0 @@ -** NVIDIA nvidia-resiliency-ext; version 0.5.0 -- https://github.com/NVIDIA/nvidia-resiliency-ext - -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, and -distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by the copyright -owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all other entities -that control, are controlled by, or are under common control with that entity. -For the purposes of this definition, "control" means (i) the power, direct or -indirect, to cause the direction or management of such entity, whether by -contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity exercising -permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, including -but not limited to software source code, documentation source, and configuration -files. - -"Object" form shall mean any form resulting from mechanical transformation or -translation of a Source form, including but not limited to compiled object code, -generated documentation, and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or Object form, made -available under the License, as indicated by a copyright notice that is included -in or attached to the work (an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object form, that -is based on (or derived from) the Work and for which the editorial revisions, -annotations, elaborations, or other modifications represent, as a whole, an -original work of authorship. For the purposes of this License, Derivative Works -shall not include works that remain separable from, or merely link (or bind by -name) to the interfaces of, the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including the original version -of the Work and any modifications or additions to that Work or Derivative Works -thereof, that is intentionally submitted to Licensor for inclusion in the Work -by the copyright owner or by an individual or Legal Entity authorized to submit -on behalf of the copyright owner. For the purposes of this definition, -"submitted" means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, and -issue tracking systems that are managed by, or on behalf of, the Licensor for -the purpose of discussing and improving the Work, but excluding communication -that is conspicuously marked or otherwise designated in writing by the copyright -owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity on behalf -of whom a Contribution has been received by Licensor and subsequently -incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of this -License, each Contributor hereby grants to You a perpetual, worldwide, non- -exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, -prepare Derivative Works of, publicly display, publicly perform, sublicense, and -distribute the Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of this License, -each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no- -charge, royalty-free, irrevocable (except as stated in this section) patent -license to make, have made, use, offer to sell, sell, import, and otherwise -transfer the Work, where such license applies only to those patent claims -licensable by such Contributor that are necessarily infringed by their -Contribution(s) alone or by combination of their Contribution(s) with the Work -to which such Contribution(s) was submitted. If You institute patent litigation -against any entity (including a cross-claim or counterclaim in a lawsuit) -alleging that the Work or a Contribution incorporated within the Work -constitutes direct or contributory patent infringement, then any patent licenses -granted to You under this License for that Work shall terminate as of the date -such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the Work or -Derivative Works thereof in any medium, with or without modifications, and in -Source or Object form, provided that You meet the following conditions: - - (a) You must give any other recipients of the Work or Derivative Works a -copy of this License; and - - (b) You must cause any modified files to carry prominent notices stating -that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works that You -distribute, all copyright, patent, trademark, and attribution notices from the -Source form of the Work, excluding those notices that do not pertain to any part -of the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its distribution, -then any Derivative Works that You distribute must include a readable copy of -the attribution notices contained within such NOTICE file, excluding those -notices that do not pertain to any part of the Derivative Works, in at least one -of the following places: within a NOTICE text file distributed as part of the -Derivative Works; within the Source form or documentation, if provided along -with the Derivative Works; or, within a display generated by the Derivative -Works, if and wherever such third-party notices normally appear. The contents of -the NOTICE file are for informational purposes only and do not modify the -License. You may add Your own attribution notices within Derivative Works that -You distribute, alongside or as an addendum to the NOTICE text from the Work, -provided that such additional attribution notices cannot be construed as -modifying the License. - - You may add Your own copyright statement to Your modifications and may -provide additional or different license terms and conditions for use, -reproduction, or distribution of Your modifications, or for any such Derivative -Works as a whole, provided Your use, reproduction, and distribution of the Work -otherwise complies with the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, any -Contribution intentionally submitted for inclusion in the Work by You to the -Licensor shall be under the terms and conditions of this License, without any -additional terms or conditions. Notwithstanding the above, nothing herein shall -supersede or modify the terms of any separate license agreement you may have -executed with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade names, -trademarks, service marks, or product names of the Licensor, except as required -for reasonable and customary use in describing the origin of the Work and -reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or agreed to in -writing, Licensor provides the Work (and each Contributor provides its -Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied, including, without limitation, any warranties -or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -PARTICULAR PURPOSE. You are solely responsible for determining the -appropriateness of using or redistributing the Work and assume any risks -associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, whether in -tort (including negligence), contract, or otherwise, unless required by -applicable law (such as deliberate and grossly negligent acts) or agreed to in -writing, shall any Contributor be liable to You for damages, including any -direct, indirect, special, incidental, or consequential damages of any character -arising as a result of this License or out of the use or inability to use the -Work (including but not limited to damages for loss of goodwill, work stoppage, -computer failure or malfunction, or any and all other commercial damages or -losses), even if such Contributor has been advised of the possibility of such -damages. - -9. Accepting Warranty or Additional Liability. While redistributing the Work or -Derivative Works thereof, You may choose to offer, and charge a fee for, -acceptance of support, warranty, indemnity, or other liability obligations -and/or rights consistent with this License. However, in accepting such -obligations, You may act only on Your own behalf and on Your sole -responsibility, not on behalf of any other Contributor, and only if You agree to -indemnify, defend, and hold each Contributor harmless for any liability incurred -by, or claims asserted against, such Contributor by reason of your accepting any -such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -** Meta PyTorch; version release 2.6 -- https://github.com/pytorch/pytorch - -From PyTorch: - -Copyright (c) 2016- Facebook, Inc (Adam Paszke) -Copyright (c) 2014- Facebook, Inc (Soumith Chintala) -Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) -Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) -Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) -Copyright (c) 2011-2013 NYU (Clement Farabet) -Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) -Copyright (c) 2006 Idiap Research Institute (Samy Bengio) -Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) - -From Caffe2: - -Copyright (c) 2016-present, Facebook Inc. All rights reserved. - -All contributions by Facebook: -Copyright (c) 2016 Facebook Inc. - -All contributions by Google: -Copyright (c) 2015 Google Inc. -All rights reserved. - -All contributions by Yangqing Jia: -Copyright (c) 2015 Yangqing Jia -All rights reserved. - -All contributions by Kakao Brain: -Copyright 2019-2020 Kakao Brain - -All contributions by Cruise LLC: -Copyright (c) 2022 Cruise LLC. -All rights reserved. - -All contributions by Tri Dao: -Copyright (c) 2024 Tri Dao. -All rights reserved. - -All contributions by Arm: -Copyright (c) 2021, 2023-2025 Arm Limited and/or its affiliates - -All contributions from Caffe: -Copyright(c) 2013, 2014, 2015, the respective contributors -All rights reserved. - -All other contributions: -Copyright(c) 2015, 2016 the respective contributors -All rights reserved. - -Caffe2 uses a copyright model similar to Caffe: each contributor holds -copyright over their contributions to Caffe2. The project versioning records -all such contribution and copyright details. If a contributor wants to further -mark their specific copyright on a particular contribution, they should -indicate their copyright solely in the commit message of the change when it is -committed. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America - and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -* For NVIDIA nvidia-resiliency-ext see also this required NOTICE: - Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -* For Meta PyTorch see also this required NOTICE: - Copyright (c) Meta Platforms, Inc. and affiliates \ No newline at end of file diff --git a/examples/gpt_oss/launch/peft_gpt_oss_120b_checkpointless_p5.yaml b/examples/gpt_oss/launch/peft_gpt_oss_120b_checkpointless_p5.yaml index 0e41575..f18b23e 100644 --- a/examples/gpt_oss/launch/peft_gpt_oss_120b_checkpointless_p5.yaml +++ b/examples/gpt_oss/launch/peft_gpt_oss_120b_checkpointless_p5.yaml @@ -17,12 +17,12 @@ metadata: labels: app.kubernetes.io/name: hyperpod app.kubernetes.io/managed-by: kustomize - name: &jobname htzhong-gpt-120b-lora-checkpointless + name: &jobname username-gpt-120b-lora-checkpointless annotations: - user: &user htzhong + user: &user username region: ®ion ap-south-1 - workspace: &workspace /data/htzhong/ - output_dir: &output_dir /data/htzhong/output + workspace: &workspace /data/username/ + output_dir: &output_dir /data/username/output postfix: &postfix checkpointless spec: nprocPerNode: "8" @@ -36,9 +36,7 @@ spec: beta.kubernetes.io/instance-type: ml.p5.48xlarge containers: - name: ptjob - # image: "855988369404.dkr.ecr.us-west-2.amazonaws.com/haitao-test:hyperpod-checkpointless-training_v1.0.0" - # image: "556809692997.dkr.ecr.us-east-2.amazonaws.com/hyperpod-checkpointless-training:v1.0.0" - image: "839249767557.dkr.ecr.us-west-2.amazonaws.com/hyperpod-checkpointless-training:v1.0.0" + image: "" imagePullPolicy: Always securityContext: privileged: true @@ -136,11 +134,9 @@ spec: ++callbacks.3.test_fault_config.fault_prob_random=0 \ ++callbacks.3.test_fault_config.fault_ranks='[8]' \ ++callbacks.3.test_fault_config.steps_before_fault=10 \ - resume.restore_config=null \ - dataset.dataset_path='/data/datasets/llama3-4m/train' \ - +dataset.val_dataset_path='/data/datasets/llama3/val' \ - ~trainer.val_check_interval \ - trainer.limit_val_batches=0.0 \ + resume.restore_config.path='' \ + dataset.dataset_path='' \ + +dataset.val_dataset_path='' \ data.global_batch_size=16 2>&1 | tee "${LOGDIR}/train-${POD_NAME}.log" volumeMounts: - name: persistent-storage @@ -155,4 +151,4 @@ spec: emptyDir: medium: Memory runPolicy: - cleanPodPolicy: "All" \ No newline at end of file + cleanPodPolicy: "All" diff --git a/requirements.txt b/requirements.txt index 4fb7996..47728e4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -423,7 +423,7 @@ typing-extensions==4.15.0 # typing-inspection typing-inspection==0.4.2 # via pydantic -urllib3==2.5.0 +urllib3==2.6.0 # via # botocore # requests diff --git a/src/hyperpod_checkpointless_training/inprocess/abort.py b/src/hyperpod_checkpointless_training/inprocess/abort.py index 219f5e6..8572b37 100644 --- a/src/hyperpod_checkpointless_training/inprocess/abort.py +++ b/src/hyperpod_checkpointless_training/inprocess/abort.py @@ -1,17 +1,4 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -# Portions taken from NVIDIA nvidia-resiliency-ext, Copyright Nvidia Corporation +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com import abc import concurrent.futures diff --git a/src/hyperpod_checkpointless_training/inprocess/compose.py b/src/hyperpod_checkpointless_training/inprocess/compose.py index ce9f8b5..f4748e2 100644 --- a/src/hyperpod_checkpointless_training/inprocess/compose.py +++ b/src/hyperpod_checkpointless_training/inprocess/compose.py @@ -1,17 +1,4 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -# Portions taken from NVIDIA nvidia-resiliency-ext, Copyright Nvidia Corporation +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com import inspect import warnings diff --git a/src/hyperpod_checkpointless_training/inprocess/exception.py b/src/hyperpod_checkpointless_training/inprocess/exception.py index a139c29..393e428 100644 --- a/src/hyperpod_checkpointless_training/inprocess/exception.py +++ b/src/hyperpod_checkpointless_training/inprocess/exception.py @@ -1,17 +1,4 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -# Portions taken from NVIDIA nvidia-resiliency-ext, Copyright Nvidia Corporation +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com import inspect import logging diff --git a/src/hyperpod_checkpointless_training/inprocess/finalize.py b/src/hyperpod_checkpointless_training/inprocess/finalize.py index b1a1df7..413a151 100644 --- a/src/hyperpod_checkpointless_training/inprocess/finalize.py +++ b/src/hyperpod_checkpointless_training/inprocess/finalize.py @@ -1,17 +1,4 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -# Portions taken from NVIDIA nvidia-resiliency-ext, Copyright Nvidia Corporation +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com import abc import datetime diff --git a/src/hyperpod_checkpointless_training/inprocess/health_check.py b/src/hyperpod_checkpointless_training/inprocess/health_check.py index a1a36e8..b4239e0 100644 --- a/src/hyperpod_checkpointless_training/inprocess/health_check.py +++ b/src/hyperpod_checkpointless_training/inprocess/health_check.py @@ -1,17 +1,4 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -# Portions taken from NVIDIA nvidia-resiliency-ext, Copyright Nvidia Corporation +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com import abc import datetime diff --git a/src/hyperpod_checkpointless_training/inprocess/param_utils.py b/src/hyperpod_checkpointless_training/inprocess/param_utils.py index da85710..1d01589 100644 --- a/src/hyperpod_checkpointless_training/inprocess/param_utils.py +++ b/src/hyperpod_checkpointless_training/inprocess/param_utils.py @@ -1,17 +1,4 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -# Portions taken from NVIDIA nvidia-resiliency-ext, Copyright Nvidia Corporation +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com import ast import inspect diff --git a/src/hyperpod_checkpointless_training/inprocess/tools/__init__.py b/src/hyperpod_checkpointless_training/inprocess/tools/__init__.py index 2a3ff83..f2b7426 100644 --- a/src/hyperpod_checkpointless_training/inprocess/tools/__init__.py +++ b/src/hyperpod_checkpointless_training/inprocess/tools/__init__.py @@ -1,17 +1,4 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -# Portions taken from NVIDIA nvidia-resiliency-ext, Copyright Nvidia Corporation +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com from . import inject_fault as inject_fault from . import startup_overhead_tracer as startup_overhead_tracer diff --git a/src/hyperpod_checkpointless_training/inprocess/tools/inject_fault.py b/src/hyperpod_checkpointless_training/inprocess/tools/inject_fault.py index 449ac8f..b2250e0 100644 --- a/src/hyperpod_checkpointless_training/inprocess/tools/inject_fault.py +++ b/src/hyperpod_checkpointless_training/inprocess/tools/inject_fault.py @@ -1,20 +1,4 @@ -# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES -# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Modifications © Amazon.com +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com import ctypes import datetime diff --git a/src/hyperpod_checkpointless_training/inprocess/wrap.py b/src/hyperpod_checkpointless_training/inprocess/wrap.py index 025b5c6..dcf4501 100644 --- a/src/hyperpod_checkpointless_training/inprocess/wrap.py +++ b/src/hyperpod_checkpointless_training/inprocess/wrap.py @@ -1,17 +1,4 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -# Portions taken from NVIDIA nvidia-resiliency-ext, Copyright Nvidia Corporation +# Original Copyright (c), NVIDIA CORPORATION. Modifications © Amazon.com import functools import gc diff --git a/src/hyperpod_checkpointless_training/nemo_plugins/checkpoint_manager.py b/src/hyperpod_checkpointless_training/nemo_plugins/checkpoint_manager.py index 1983671..6d42f58 100644 --- a/src/hyperpod_checkpointless_training/nemo_plugins/checkpoint_manager.py +++ b/src/hyperpod_checkpointless_training/nemo_plugins/checkpoint_manager.py @@ -112,8 +112,6 @@ def _traverse_obj(path: OBJ_PATH, value: STATE_DICT_ITEM) -> None: _traverse_obj((str(key),), value) -"""Original Copyright Meta Platforms, Inc. and affiliates under the BSD License""" -"""Modifications Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved""" def flatten_state_dict( state_dict: STATE_DICT_TYPE, ) -> tuple[STATE_DICT_TYPE, FLATTEN_MAPPING]: diff --git a/src/hyperpod_checkpointless_training/nemo_plugins/checkpoint_transform_callback.py b/src/hyperpod_checkpointless_training/nemo_plugins/checkpoint_transform_callback.py index 45c5696..e0c88f8 100644 --- a/src/hyperpod_checkpointless_training/nemo_plugins/checkpoint_transform_callback.py +++ b/src/hyperpod_checkpointless_training/nemo_plugins/checkpoint_transform_callback.py @@ -1,16 +1,3 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - from lightning.pytorch.callbacks import Callback from typing_extensions import override @@ -49,4 +36,4 @@ def get_peft_callback(self, trainer): for callback in trainer.callbacks: if isinstance(callback, PEFT): return callback - return None + return None \ No newline at end of file diff --git a/tests/nemo_plugins/unit_test/test_checkpoint_transform_callback.py b/tests/nemo_plugins/unit_test/test_checkpoint_transform_callback.py index 265803a..ec3183b 100644 --- a/tests/nemo_plugins/unit_test/test_checkpoint_transform_callback.py +++ b/tests/nemo_plugins/unit_test/test_checkpoint_transform_callback.py @@ -1,16 +1,3 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - import pytest from unittest.mock import Mock, MagicMock import lightning.pytorch as pl