diff --git a/.gitignore b/.gitignore index 56ed1bb..3a30521 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # misc +.ruff_cache .DS_Store .vscode diff --git a/demos/kubeflow-pipelines/README.md b/demos/kubeflow-pipelines/README.md new file mode 100644 index 0000000..bb49edc --- /dev/null +++ b/demos/kubeflow-pipelines/README.md @@ -0,0 +1,13 @@ +# ML Pipeline Orchestration with Kubeflow Pipelines + +[Kubeflow Pipelines (KFP)](https://kubeflow-pipelines.readthedocs.io) is a ... + +This demo shows .. + +## Demo Objectives + +* TODO... + +## Running the Demo + +This demo is contained... diff --git a/demos/kubeflow-pipelines/components.py b/demos/kubeflow-pipelines/components.py new file mode 100644 index 0000000..b64aa60 --- /dev/null +++ b/demos/kubeflow-pipelines/components.py @@ -0,0 +1,51 @@ +"""Patterrns for developing reusable KFP pipeline components.""" +import shutil +from pathlib import Path +from unittest.mock import Mock + +import numpy as np +from kfp import dsl, local + +_BASE_IMAGE = "python3.10" +_REQUIREMENTS = Path("requirements.txt").read_text().splitlines() + + +@dsl.component(base_image=_BASE_IMAGE, packages_to_install=_REQUIREMENTS) +def make_data(n_rows: int, n_cols: int, data: dsl.Output[dsl.Dataset]) -> None: + """Synthetic dataset generation pipeline component. """ + from numpy import save + from numpy.random import default_rng + + rng = default_rng(42) + data_arr = rng.standard_normal((n_rows, n_cols)) + save(data.path, data_arr) + + +def test_make_data_component(): + output_dataset_file = "foo.npy" + mock_dataset = Mock() + mock_dataset.path = output_dataset_file + try: + make_data.execute(n_rows=3, n_cols=2, data=mock_dataset) + output_dataset = np.load(output_dataset_file) + assert output_dataset.shape == (3, 2) + except Exception: + assert False + finally: + data_filepath = Path(output_dataset_file) + if data_filepath.exists(): + data_filepath.unlink() + + +def test_make_data_component_integration(): + kfp_root_dir = "./kfp_outputs" + local.init(runner=local.SubprocessRunner(use_venv=True), pipeline_root=kfp_root_dir) + try: + task = make_data(n_rows=3, n_cols=2) + output_dataset = np.load(f"{task.outputs['data'].path}.npy") + assert output_dataset.shape == (3, 2) + except Exception: + assert False + finally: + shutil.rmtree(kfp_root_dir, ignore_errors=True) + \ No newline at end of file diff --git a/demos/kubeflow-pipelines/docs/demo_requirements.md b/demos/kubeflow-pipelines/docs/demo_requirements.md new file mode 100644 index 0000000..266bacd --- /dev/null +++ b/demos/kubeflow-pipelines/docs/demo_requirements.md @@ -0,0 +1,7 @@ +# Demo Requirements + +This demo depends on the following Python packages: + +```text title="demos/kubeflow-pipelines/requirements.txt" +--8<-- "demos/kubeflow-pipelines/requirements.txt" +``` diff --git a/demos/kubeflow-pipelines/requirements.txt b/demos/kubeflow-pipelines/requirements.txt new file mode 100644 index 0000000..5c5222b --- /dev/null +++ b/demos/kubeflow-pipelines/requirements.txt @@ -0,0 +1,3 @@ +kfp==2.5.* +numpy==1.26.* +pytest==8.0.*