-
Notifications
You must be signed in to change notification settings - Fork 238
Add NeMo Conversion Scripts to Puzzletron #784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c534473
c57d21e
14263d3
ab5d34b
40e01b6
19bf6be
b4d9d52
350c43c
fc2f8a3
ef36626
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import argparse | ||
| import os | ||
| from pathlib import Path | ||
| from typing import Any | ||
|
|
||
| from nemo.collections import llm | ||
|
|
||
| from modelopt.torch.puzzletron.export.MCore.llama_nemotron import ( | ||
| PuzzletronLlamaNemotronModel, | ||
| PuzzletronNemotronModelConfig, | ||
| ) | ||
|
|
||
|
|
||
| def convert_model( | ||
| hf_model_path_local: str, output_path_nemo_local: str, overwrite: bool = False | ||
| ) -> Any: | ||
| """Convert a Puzzletron HuggingFace model to NeMo format. | ||
|
|
||
| Args: | ||
| hf_model_path_local: Path to the input Puzzletron HuggingFace model directory | ||
| output_path_nemo_local: Path where the converted Puzzletron NeMo model will be saved | ||
| overwrite: Whether to overwrite existing output directory | ||
| """ | ||
|
|
||
| model = PuzzletronLlamaNemotronModel(config=PuzzletronNemotronModelConfig) | ||
LianaMikael marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # NOTE: API call to import_ckpt is here: https://github.com/NVIDIA-NeMo/NeMo/blob/294ddff187f68c055d87ffe9400e65975b38693d/nemo/collections/llm/api.py#L888 | ||
| print( | ||
| f"calling import_ckpt with model: {model}, " | ||
| f"source: {hf_model_path_local}, " | ||
| f"output_path: {output_path_nemo_local}, " | ||
| f"overwrite: {overwrite}" | ||
| ) | ||
| nemo2_path = llm.import_ckpt( | ||
| model=model, | ||
| source="hf://" + hf_model_path_local, | ||
| output_path=Path(output_path_nemo_local), | ||
| overwrite=overwrite, | ||
| ) | ||
|
|
||
| print(f"Model saved to {nemo2_path}") | ||
| return nemo2_path | ||
|
|
||
|
|
||
| def main() -> None: | ||
| parser = argparse.ArgumentParser( | ||
| description="Convert Puzzletron HuggingFace model to NeMo format" | ||
| ) | ||
| parser.add_argument( | ||
| "--input-ckpt-path", | ||
| "-i", | ||
| type=str, | ||
| required=True, | ||
| help="Path to the input Puzzletron HuggingFace model directory", | ||
| ) | ||
| parser.add_argument( | ||
| "--output-ckpt-path", | ||
| "-o", | ||
| type=str, | ||
| required=True, | ||
| help="Path where the converted Puzzletron NeMo model will be saved", | ||
| ) | ||
| parser.add_argument( | ||
| "--overwrite", | ||
| action="store_true", | ||
| default=False, | ||
| help="Whether to overwrite existing output directory (default: False)", | ||
| ) | ||
|
|
||
| args = parser.parse_args() | ||
|
|
||
| # Validate input path | ||
| if not os.path.exists(args.input_ckpt_path): | ||
| raise FileNotFoundError(f"Input model path does not exist: {args.input_ckpt_path}") | ||
|
|
||
| # Create output directory if it doesn't exist | ||
| os.makedirs(os.path.dirname(args.output_ckpt_path), exist_ok=True) | ||
LianaMikael marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| print(f"Converting model from {args.input_ckpt_path} to {args.output_ckpt_path}") | ||
| convert_model(args.input_ckpt_path, args.output_ckpt_path, args.overwrite) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,96 @@ | ||||||||||||||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||||||||||||||
| # SPDX-License-Identifier: Apache-2.0 | ||||||||||||||
| # | ||||||||||||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||||||||||||
| # you may not use this file except in compliance with the License. | ||||||||||||||
| # You may obtain a copy of the License at | ||||||||||||||
| # | ||||||||||||||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||||||||||||||
| # | ||||||||||||||
| # Unless required by applicable law or agreed to in writing, software | ||||||||||||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||||||||||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||||||||||
| # See the License for the specific language governing permissions and | ||||||||||||||
| # limitations under the License. | ||||||||||||||
|
|
||||||||||||||
| import argparse | ||||||||||||||
| import os | ||||||||||||||
| from pathlib import Path | ||||||||||||||
| from typing import Any | ||||||||||||||
|
|
||||||||||||||
| from nemo.collections import llm | ||||||||||||||
|
|
||||||||||||||
| from modelopt.torch.puzzletron.tools.checkpoint_utils_hf import copy_deci_lm_hf_code | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def convert_model( | ||||||||||||||
| nemo_model_path_local: str, output_path_hf_local: str, overwrite: bool = False | ||||||||||||||
| ) -> Any: | ||||||||||||||
| """Convert a NeMo model to HuggingFace format. | ||||||||||||||
|
|
||||||||||||||
| Args: | ||||||||||||||
| nemo_model_path_local: Path to the input NeMo model file (.nemo) | ||||||||||||||
| output_path_hf_local: Path where the converted HuggingFace model will be saved | ||||||||||||||
| overwrite: Whether to overwrite existing output directory | ||||||||||||||
| """ | ||||||||||||||
|
|
||||||||||||||
| # NOTE: API call to export_ckpt is here: https://github.com/NVIDIA-NeMo/NeMo/blob/main/nemo/collections/llm/api.py#L987 | ||||||||||||||
| print( | ||||||||||||||
| f"calling export_ckpt with path: {nemo_model_path_local}, " | ||||||||||||||
| f"target: hf, output_path: {output_path_hf_local}, " | ||||||||||||||
| f"target_model_name: PuzzletronLlamaNemotronModel, " | ||||||||||||||
| f"overwrite: {overwrite}" | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| hf_path = llm.export_ckpt( | ||||||||||||||
| path=nemo_model_path_local, | ||||||||||||||
| target="hf", | ||||||||||||||
| output_path=Path(output_path_hf_local), | ||||||||||||||
| target_model_name="PuzzletronLlamaNemotronModel", | ||||||||||||||
| overwrite=overwrite, | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| copy_deci_lm_hf_code(hf_path) | ||||||||||||||
|
|
||||||||||||||
| print(f"Model saved to {hf_path}") | ||||||||||||||
| return hf_path | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def main() -> None: | ||||||||||||||
| parser = argparse.ArgumentParser(description="Convert NeMo model to HuggingFace format") | ||||||||||||||
| parser.add_argument( | ||||||||||||||
| "--input-ckpt-path", | ||||||||||||||
| "-i", | ||||||||||||||
| type=str, | ||||||||||||||
| required=True, | ||||||||||||||
| help="Path to the input NeMo model checkpoint", | ||||||||||||||
| ) | ||||||||||||||
| parser.add_argument( | ||||||||||||||
| "--output-ckpt-path", | ||||||||||||||
| "-o", | ||||||||||||||
| type=str, | ||||||||||||||
| required=True, | ||||||||||||||
| help="Path where the converted Puzzletron HuggingFace model will be saved", | ||||||||||||||
| ) | ||||||||||||||
| parser.add_argument( | ||||||||||||||
| "--overwrite", | ||||||||||||||
| action="store_true", | ||||||||||||||
| default=False, | ||||||||||||||
| help="Whether to overwrite existing output directory (default: False)", | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| args = parser.parse_args() | ||||||||||||||
|
|
||||||||||||||
| # Validate input path | ||||||||||||||
| if not os.path.exists(args.input_ckpt_path): | ||||||||||||||
| raise FileNotFoundError(f"Input model path does not exist: {args.input_ckpt_path}") | ||||||||||||||
|
|
||||||||||||||
| # Create output directory if it doesn't exist | ||||||||||||||
| os.makedirs(os.path.dirname(args.output_ckpt_path), exist_ok=True) | ||||||||||||||
|
Comment on lines
+88
to
+89
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Edge case when output path has no directory component. Same issue as in Proposed fix # Create output directory if it doesn't exist
- os.makedirs(os.path.dirname(args.output_ckpt_path), exist_ok=True)
+ output_dir = os.path.dirname(args.output_ckpt_path)
+ if output_dir:
+ os.makedirs(output_dir, exist_ok=True)📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||
|
|
||||||||||||||
| print(f"Converting model from {args.input_ckpt_path} to {args.output_ckpt_path}") | ||||||||||||||
| convert_model(args.input_ckpt_path, args.output_ckpt_path, args.overwrite) | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| if __name__ == "__main__": | ||||||||||||||
| main() | ||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| lm-eval==0.4.9 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would move it to modelopt/torch/puzzletron/...., the same for convert_nemo_to_hf
examples should not keep the logic - should be just examples,
similarly, example/puzzletron/main.py should go to modelopt/torch/puzzletron/...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are cmdline scripts which we should keep in examples folder just like all other modelopt examples. ModelOpt installation would be somewhere in /usr/local/... and we dont want users to run script from there
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And the more things we add to modelopt library will require more dependencies for modelopt. Keeping example scripts separate means we can keep modelopt dependencies leaner and move extra dependencies to examples/<example_name>/requirements.txt
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer we keep it in examples, the scripts rely on the nemo dependency too