Snapchat · kmontemayor2-sc · Jan 21, 2026 · Jan 10, 2026 · Jan 10, 2026 · Jan 13, 2026
@@ -237,6 +237,14 @@ run_het_dblp_sup_e2e_test:
 		--test_spec_uri="testing/e2e_tests/e2e_tests.yaml" \
 		--test_names="het_dblp_sup_test"
 
+run_hom_cora_sup_gs_e2e_test: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
+run_hom_cora_sup_gs_e2e_test: compile_gigl_kubeflow_pipeline
+run_hom_cora_sup_gs_e2e_test:
+	uv run python testing/e2e_tests/e2e_test.py \
+		--compiled_pipeline_path=$(compiled_pipeline_path) \
+		--test_spec_uri="testing/e2e_tests/e2e_tests.yaml" \
+		--test_names="hom_cora_sup_gs_test"
+
 run_all_e2e_tests: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
 run_all_e2e_tests: compile_gigl_kubeflow_pipeline
 run_all_e2e_tests:

@@ -0,0 +1,45 @@
+# Diffs from e2e_glt_resource_config.yaml
+# - Swap vertex_ai_inferencer_config for vertex_ai_graph_store_inferencer_config
+shared_resource_config:
+  resource_labels:
+    cost_resource_group_tag: dev_experiments_COMPONENT
+    cost_resource_group: gigl_platform
+  common_compute_config:
+    project: "external-snap-ci-github-gigl"
+    region: "us-central1"
+    temp_assets_bucket: "gs://gigl-cicd-temp"
+    temp_regional_assets_bucket: "gs://gigl-cicd-temp"
+    perm_assets_bucket: "gs://gigl-cicd-perm"
+    temp_assets_bq_dataset_name: "gigl_temp_assets"
+    embedding_bq_dataset_name: "gigl_embeddings"
+    gcp_service_account_email: "untrusted-external-github-gigl@external-snap-ci-github-gigl.iam.gserviceaccount.com"
+    dataflow_runner: "DataflowRunner"
+preprocessor_config:
+  edge_preprocessor_config:
+    num_workers: 1
+    max_num_workers: 128
+    machine_type: "n2d-highmem-32"
+    disk_size_gb: 300
+  node_preprocessor_config:
+    num_workers: 1
+    max_num_workers: 128
+    machine_type: "n2d-highmem-64"
+    disk_size_gb: 300
+trainer_resource_config:
+  vertex_ai_trainer_config:
+    machine_type: n1-highmem-32
+    gpu_type: NVIDIA_TESLA_T4
+    gpu_limit: 2
+    num_replicas: 2
+inferencer_resource_config:
+  vertex_ai_graph_store_inferencer_config:
+    graph_store_pool:
+      machine_type: n2-highmem-32
+      gpu_type: ACCELERATOR_TYPE_UNSPECIFIED
+      gpu_limit: 0
+      num_replicas: 2
+    compute_pool:
+      machine_type: n1-standard-16
+      gpu_type: NVIDIA_TESLA_T4
+      gpu_limit: 2
+      num_replicas: 2
@@ -0,0 +1,45 @@
+# This config is used to run homogeneous CORA supervised training and inference using in memory GiGL SGS using the Graph Store mode.
+# This can be run with `make run_hom_cora_sup_gs_test`.
+# Diffs from ../configs/e2e_hom_cora_sup_task_config.yaml:
+# - None (currently) - we detect that "Graph Store" mode should be employed from the resource config
+graphMetadata:
+  edgeTypes:
+  - dstNodeType: paper
+    relation: cites
+    srcNodeType: paper
+  nodeTypes:
+  - paper
+datasetConfig:
+  dataPreprocessorConfig:
+    dataPreprocessorConfigClsPath: gigl.src.mocking.mocking_assets.passthrough_preprocessor_config_for_mocked_assets.PassthroughPreprocessorConfigForMockedAssets
+    dataPreprocessorArgs:
+      # This argument is specific for the `PassthroughPreprocessorConfigForMockedAssets` preprocessor to indicate which dataset we should be using
+      mocked_dataset_name: 'cora_homogeneous_node_anchor_edge_features_user_defined_labels'
+# TODO(kmonte): Add GS trainer
+trainerConfig:
+  trainerArgs:
+    # Example argument to trainer
+    log_every_n_batch: "50" # Frequency in which we log batch information
+    num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case
+  command: python -m examples.link_prediction.homogeneous_training
+# TODO(kmonte): Move to user-defined server code
+inferencerConfig:
+  inferencerArgs:
+    # Example argument to inferencer
+    log_every_n_batch: "50" # Frequency in which we log batch information
+    num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case
+  inferenceBatchSize: 512
+  command: python -m examples.link_prediction.graph_store.homogeneous_inference
+sharedConfig:
+  shouldSkipInference: false
+  # Model Evaluation is currently only supported for tabularized SGS GiGL pipelines. This will soon be added for in-mem SGS GiGL pipelines.
+  shouldSkipModelEvaluation: true
+taskMetadata:
+  nodeAnchorBasedLinkPredictionTaskMetadata:
+    supervisionEdgeTypes:
+    - dstNodeType: paper
+      relation: cites
+      srcNodeType: paper
+featureFlags:
+  should_run_glt_backend: 'True'
+  data_preprocessor_num_shards: '2'
@@ -0,0 +1,67 @@
+# Example resource config for graph store mode
+# Diffs from ../configs/example_resource_config.yaml:
+# - Swap vertex_ai_inferencer_config for vertex_ai_graph_store_inferencer_config
+# You should swap out the following fields with your own resources:
+# - project: "USER_PROVIDED_PROJECT"
+# - temp_assets_bucket: "gs://USER_PROVIDED_TEMP_ASSETS_BUCKET"
+# - temp_regional_assets_bucket: "gs://USER_PROVIDED_TEMP_ASSETS_BUCKET"
+# - perm_assets_bucket: "gs://USER_PROVIDED_PERM_ASSETS_BUCKET"
+# - gcp_service_account_email: "USER_PROVIDED_SA@USER_PROVIDED_PROJECT.iam.gserviceaccount.com"
+shared_resource_config:
+  resource_labels:
+  # These are compute labels that we will try to attach to the resources created by GiGL components.
+  # More information: https://cloud.google.com/compute/docs/labeling-resources.
+  # These can be mostly used to get finer grained cost reporting through GCP billing on individual component
+  # and pipeline costs.
+
+  # If COMPONENT is provided in cost_resource_group_tag, it will be automatically be replaced with one of
+  # {pre|sgs|spl|tra|inf|pos} standing for: {Preprocessor | Subgraph Sampler | Split Generator | Trainer
+  # | Inference | Post  Processor} so we can get more accurate cost measurements of each component.
+  # See implementation:
+  # `python/gigl/src/common/types/pb_wrappers/gigl_resource_config.py#GiglResourceConfigWrapper.get_resource_labels`
+
+    cost_resource_group_tag: dev_experiments_COMPONENT
+    cost_resource_group: gigl_platform
+  common_compute_config:
+    project: "USER_PROVIDED_PROJECT"
+    region: "us-central1"
+    # We recommend using the same bucket for temp_assets_bucket and temp_regional_assets_bucket
+    # These fields will get combined into one in the future. Note: Usually storage for regional buckets is cheaper,
+    # thus that is recommended.
+    temp_assets_bucket: "gs://USER_PROVIDED_TEMP_ASSETS_BUCKET"
+    temp_regional_assets_bucket: "gs://USER_PROVIDED_TEMP_ASSETS_BUCKET"
+    perm_assets_bucket: "gs://USER_PROVIDED_PERM_ASSETS_BUCKET"
+    temp_assets_bq_dataset_name: "gigl_temp_assets"
+    embedding_bq_dataset_name: "gigl_embeddings"
+    gcp_service_account_email: "USER_PROVIDED_SA@USER_PROVIDED_PROJECT.iam.gserviceaccount.com"
+    dataflow_runner: "DataflowRunner"
+preprocessor_config:
+  edge_preprocessor_config:
+    num_workers: 1
+    max_num_workers: 4
+    machine_type: "n2-standard-16"
+    disk_size_gb: 300
+  node_preprocessor_config:
+    num_workers: 1
+    max_num_workers: 4
+    machine_type: "n2-standard-16"
+    disk_size_gb: 300
+# TODO(kmonte): Update
+trainer_resource_config:
+  vertex_ai_trainer_config:
+    machine_type: n1-standard-16
+    gpu_type: NVIDIA_TESLA_T4
+    gpu_limit: 2
+    num_replicas: 2
+inferencer_resource_config:
+  vertex_ai_graph_store_inferencer_config:
+    graph_store_pool:
+      machine_type: n2-highmem-32
+      gpu_type: ACCELERATOR_TYPE_UNSPECIFIED
+      gpu_limit: 0
+      num_replicas: 2
+    compute_pool:
+      machine_type: n1-standard-16
+      gpu_type: NVIDIA_TESLA_T4
+      gpu_limit: 2
+      num_replicas: 2