Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,14 @@ run_het_dblp_sup_gs_e2e_test:
--test_spec_uri="tests/e2e_tests/e2e_tests.yaml" \
--test_names="het_dblp_sup_gs_test"

run_hom_cora_snc_e2e_test: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
run_hom_cora_snc_e2e_test: compile_gigl_kubeflow_pipeline
run_hom_cora_snc_e2e_test:
uv run python tests/e2e_tests/e2e_test.py \
--compiled_pipeline_path=$(compiled_pipeline_path) \
--test_spec_uri="tests/e2e_tests/e2e_tests.yaml" \
--test_names="hom_cora_snc_test"

run_all_e2e_tests: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
run_all_e2e_tests: compile_gigl_kubeflow_pipeline
run_all_e2e_tests:
Expand Down
18 changes: 18 additions & 0 deletions examples/node_classification/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Examples for Supervised Node Classification on Homogeneous Graphs

## Homogeneous (CORA)

We use the CORA dataset as an example for supervised node classification on a homogeneous graph.

[homogeneous_training.py](./homogeneous_training.py) and [homogeneous_inference.py](./homogeneous_inference.py) are
example training and inference loops for the CORA dataset, the MNIST of graph models, and available via the PyG
`Planetoid`
[dataset](https://pytorch-geometric.readthedocs.io/en/stable/generated/torch_geometric.datasets.Planetoid.html).

```bash
make run_hom_cora_snc_e2e_test
```

The pipeline will run each component end-to-end: `config_populator` → `data_preprocessor` → `trainer` → `inferencer`,
exporting the per-anchor predicted class label (an integer in `[0, 7)` cast to `FLOAT64`) to a BigQuery table referenced
by `InferenceAssets.get_enumerated_predictions_table_path(...)`.
1 change: 1 addition & 0 deletions examples/node_classification/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Node Classification Examples"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# This config is used to run homogeneous CORA supervised node classification training
# and inference using in-memory GiGL SGS. Run via `make run_hom_cora_snc_e2e_test`.
graphMetadata:
edgeTypes:
- dstNodeType: paper
relation: cites
srcNodeType: paper
nodeTypes:
- paper
taskMetadata:
nodeBasedTaskMetadata:
supervisionNodeTypes:
- paper
datasetConfig:
dataPreprocessorConfig:
dataPreprocessorConfigClsPath: gigl.src.mocking.mocking_assets.passthrough_preprocessor_config_for_mocked_assets.PassthroughPreprocessorConfigForMockedAssets
dataPreprocessorArgs:
mocked_dataset_name: 'cora_homogeneous_supervised_node_classification_edge_features'
trainerConfig:
trainerArgs:
log_every_n_batch: "25"
num_neighbors: "[10, 10]"
num_classes: "7"
train_batch_size: "16"
num_max_train_batches: "200"
num_val_batches: "20"
val_every_n_batch: "50"
command: python -m examples.node_classification.homogeneous_training
inferencerConfig:
inferencerArgs:
log_every_n_batch: "25"
num_neighbors: "[10, 10]"
num_classes: "7"
inferenceBatchSize: 512
command: python -m examples.node_classification.homogeneous_inference
sharedConfig:
shouldSkipAutomaticTempAssetCleanup: false
shouldSkipInference: false
# Model Evaluation is currently only supported for tabularized SGS GiGL pipelines.
shouldSkipModelEvaluation: true
featureFlags:
should_run_glt_backend: 'True'
data_preprocessor_num_shards: '2'
# NODE_BASED_TASK tasks unconditionally populate `predictions_path` in the inference metadata
# (see gigl/src/config_populator/config_populator.py:348-358); the post-processor's
# unenumerator then expects whichever paths are populated to point at real BQ tables.
# We disable the embeddings path so the post-processor only unenumerates the predictions
# table that this example actually writes via `PredictionExporter`.
should_populate_embeddings_path: 'False'
Loading