Snapchat · kmontemayor2-sc · May 15, 2026 · May 14, 2026 · May 15, 2026 · May 15, 2026
@@ -278,6 +278,14 @@ run_het_dblp_sup_gs_e2e_test:
 		--test_spec_uri="tests/e2e_tests/e2e_tests.yaml" \
 		--test_names="het_dblp_sup_gs_test"
 
+run_hom_cora_snc_e2e_test: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
+run_hom_cora_snc_e2e_test: compile_gigl_kubeflow_pipeline
+run_hom_cora_snc_e2e_test:
+	uv run python tests/e2e_tests/e2e_test.py \
+		--compiled_pipeline_path=$(compiled_pipeline_path) \
+		--test_spec_uri="tests/e2e_tests/e2e_tests.yaml" \
+		--test_names="hom_cora_snc_test"
+
 run_all_e2e_tests: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
 run_all_e2e_tests: compile_gigl_kubeflow_pipeline
 run_all_e2e_tests:

@@ -0,0 +1,18 @@
+# Examples for Supervised Node Classification on Homogeneous Graphs
+
+## Homogeneous (CORA)
+
+We use the CORA dataset as an example for supervised node classification on a homogeneous graph.
+
+[homogeneous_training.py](./homogeneous_training.py) and [homogeneous_inference.py](./homogeneous_inference.py) are
+example training and inference loops for the CORA dataset, the MNIST of graph models, and available via the PyG
+`Planetoid`
+[dataset](https://pytorch-geometric.readthedocs.io/en/stable/generated/torch_geometric.datasets.Planetoid.html).
+
+```bash
+make run_hom_cora_snc_e2e_test
+```
+
+The pipeline will run each component end-to-end: `config_populator` → `data_preprocessor` → `trainer` → `inferencer`,
+exporting the per-anchor predicted class label (an integer in `[0, 7)` cast to `FLOAT64`) to a BigQuery table referenced
+by `InferenceAssets.get_enumerated_predictions_table_path(...)`.
@@ -0,0 +1 @@
+"""Node Classification Examples"""
@@ -0,0 +1,49 @@
+# This config is used to run homogeneous CORA supervised node classification training
+# and inference using in-memory GiGL SGS. Run via `make run_hom_cora_snc_e2e_test`.
+graphMetadata:
+  edgeTypes:
+  - dstNodeType: paper
+    relation: cites
+    srcNodeType: paper
+  nodeTypes:
+  - paper
+taskMetadata:
+  nodeBasedTaskMetadata:
+    supervisionNodeTypes:
+      - paper
+datasetConfig:
+  dataPreprocessorConfig:
+    dataPreprocessorConfigClsPath: gigl.src.mocking.mocking_assets.passthrough_preprocessor_config_for_mocked_assets.PassthroughPreprocessorConfigForMockedAssets
+    dataPreprocessorArgs:
+      mocked_dataset_name: 'cora_homogeneous_supervised_node_classification_edge_features'
+trainerConfig:
+  trainerArgs:
+    log_every_n_batch: "25"
+    num_neighbors: "[10, 10]"
+    num_classes: "7"
+    train_batch_size: "16"
+    num_max_train_batches: "200"
+    num_val_batches: "20"
+    val_every_n_batch: "50"
+  command: python -m examples.node_classification.homogeneous_training
+inferencerConfig:
+  inferencerArgs:
+    log_every_n_batch: "25"
+    num_neighbors: "[10, 10]"
+    num_classes: "7"
+  inferenceBatchSize: 512
+  command: python -m examples.node_classification.homogeneous_inference
+sharedConfig:
+  shouldSkipAutomaticTempAssetCleanup: false
+  shouldSkipInference: false
+  # Model Evaluation is currently only supported for tabularized SGS GiGL pipelines.
+  shouldSkipModelEvaluation: true
+featureFlags:
+  should_run_glt_backend: 'True'
+  data_preprocessor_num_shards: '2'
+  # NODE_BASED_TASK tasks unconditionally populate `predictions_path` in the inference metadata
+  # (see gigl/src/config_populator/config_populator.py:348-358); the post-processor's
+  # unenumerator then expects whichever paths are populated to point at real BQ tables.
+  # We disable the embeddings path so the post-processor only unenumerates the predictions
+  # table that this example actually writes via `PredictionExporter`.
+  should_populate_embeddings_path: 'False'