geometric-intelligence · johmathe · Jan 29, 2026 · Jan 29, 2026
diff --git a/.gitignore b/.gitignore
@@ -192,3 +192,9 @@ logs/
 *.parquet
 datasets/
 run_data/
+
+# Anonymization: exclude files with identifying info for ICML submission
+search_results/
+outputs/
+wandb_analysis_report.md
+.cursor/
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2025 geometric-intelligence
+Copyright (c) 2025 Anonymous
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -1,7 +1,3 @@
-[![Code Quality](https://github.com/geometric-intelligence/bgbench/actions/workflows/code-quality-main.yaml/badge.svg)](https://github.com/geometric-intelligence/bgbench/actions/workflows/code-quality-main.yaml)
-[![Dependencies](https://github.com/geometric-intelligence/bgbench/actions/workflows/dependabot/dependabot-updates/badge.svg)](https://github.com/geometric-intelligence/bgbench/actions/workflows/dependabot/dependabot-updates)
-[![Tests](https://github.com/geometric-intelligence/bgbench/actions/workflows/test.yml/badge.svg)](https://github.com/geometric-intelligence/bgbench/actions/workflows/test.yml)
-
 # Big Graph Bench (BGBench)
 
 A comprehensive benchmarking framework for Graph Neural Networks (GNNs) on omics datasets for classification tasks. This repository provides standardized datasets, preprocessing pipelines, and evaluation metrics to enable fair comparison of different GNN architectures on biological data.
@@ -24,7 +20,7 @@ The easiest way to set up BGBench is using the provided environment setup script
 
 ```bash
 # Clone the repository
-git clone git@github.com:geometric-intelligence/bgbench.git
+git clone <anonymous-repo>
 cd bgbench
 
 # Run the automated setup script
@@ -44,7 +40,7 @@ If you prefer manual setup:
 
 ```bash
 # Clone the repository
-git clone git@github.com:geometric-intelligence/bgbench.git
+git clone <anonymous-repo>
 cd bgbench
 
 # Create conda environment
@@ -91,7 +87,7 @@ BGBench includes three curated omics datasets for graph-based classification:
 
 ### Dataset Storage and Access
 
-All datasets are stored on Hugging Face Hub at `geometric-intelligence/bgbench` and automatically downloaded when needed. The datasets are preprocessed and stored in Parquet format for efficient loading.
+All datasets are stored on Hugging Face Hub at `<anonymous>/bgbench` and automatically downloaded when needed. The datasets are preprocessed and stored in Parquet format for efficient loading.
 
 ## Dataset Preprocessing
 
@@ -392,9 +388,9 @@ If you use BGBench in your research, please cite:
 ```bibtex
 @software{bgbench2024,
   title={Big Graph Bench: A Benchmarking Framework for Graph Neural Networks on Omics Data},
-  author={Geometric Intelligence Team},
+  author={Anonymous Authors},
   year={2024},
-  url={https://github.com/geometric-intelligence/bgbench}
+  url={<anonymous-repo>}
 }
 ```
 

diff --git a/notebooks/datasets.ipynb b/notebooks/datasets.ipynb
@@ -17,7 +17,7 @@
     "import pandas as pd\n",
     "import seaborn as sns\n",
     "\n",
-    "sys.path.append(\"/home/johmathe/bgbench\")\n",
+    "sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(\".\"))))\n",
     "os.environ[\"PYTHONPATH\"] = os.pathsep.join(sys.path)\n",
     "from src.data import hf_datamodule\n"
    ]

diff --git a/ogbench/baseline.py b/ogbench/baseline.py
@@ -14,7 +14,6 @@
 import pandas as pd
 import rootutils
 import seaborn as sns
-import wandb
 from huggingface_hub import hf_hub_download
 from omegaconf import DictConfig, OmegaConf
 from sklearn.decomposition import PCA
@@ -35,6 +34,8 @@
 from sklearn.pipeline import Pipeline
 from sklearn.utils import shuffle
 
+import wandb
+
 rootutils.setup_root(__file__, indicator='.project-root', pythonpath=True)
 
 # Set matplotlib style
@@ -171,7 +172,7 @@ def load_metadata(data_name: str, cfg: DictConfig) -> dict[str, Any] | None:
     # Download from HuggingFace
     try:
         logger.info('Downloading metadata from HuggingFace...')
-        hf_repo_id = 'geometric-intelligence/bgbench'
+        hf_repo_id = '<anonymous>/bgbench'
         revision = cfg.dataset.loader.parameters.get('revision', 'e1631e8')
 
         metadata_file = hf_hub_download(  # nosec
@@ -248,7 +249,7 @@ def load_and_prepare_data(cfg: DictConfig) -> DatasetContainer:
         # Download from HuggingFace
         logger.info('Downloading from HuggingFace...')
 
-        hf_repo_id = 'geometric-intelligence/bgbench'
+        hf_repo_id = '<anonymous>/bgbench'
         revision = cfg.dataset.loader.parameters.get('revision', 'e1631e8')
 
         data_file = hf_hub_download(  # nosec

diff --git a/pyproject.toml b/pyproject.toml
@@ -94,9 +94,8 @@ all = ["ogbench[dev, doc]"]
 [project.scripts]
 ogbench-train = "ogbench.run:main"
 
-[project.urls]
-homepage="https://geometric-intelligence.github.io/bgbench/index.html"
-repository="https://github.com/geometric-intelligence/bgbench"
+# [project.urls]
+# homepage and repository URLs removed for anonymous review
 
 [tool.ruff]
 line-length = 99

diff --git a/scripts/export_wandb.py b/scripts/export_wandb.py
@@ -2,6 +2,7 @@
 from typing import Any
 
 import pandas as pd
+
 import wandb
 
 
@@ -18,7 +19,7 @@ def flatten_dict(d: dict[str, Any], parent_key: str = '', sep: str = '_') -> dic
 
 def main() -> None:
     api = wandb.Api()
-    runs = api.runs('johmathe/biggraphbench')
+    runs = api.runs('<anonymous>/biggraphbench')
 
     summary_list: list[dict[str, Any]] = []
     config_list: list[dict[str, Any]] = []

diff --git a/scripts/plot_adjacency_threshold_analysis.py b/scripts/plot_adjacency_threshold_analysis.py
@@ -49,7 +49,7 @@
     },
 }
 
-HF_REPO_ID = 'geometric-intelligence/bgbench'
+HF_REPO_ID = '<anonymous>/bgbench'
 
 
 def load_and_preprocess_dataset(

diff --git a/scripts/utils.py b/scripts/utils.py
@@ -79,7 +79,7 @@ def upload_to_huggingface(
     """
     try:
         api = huggingface_hub.HfApi()
-        repo_id = 'geometric-intelligence/bgbench'
+        repo_id = '<anonymous>/bgbench'
 
         # Create repository if it doesn't exist
         try:

diff --git a/tutorials/dataset_stats.ipynb b/tutorials/dataset_stats.ipynb
@@ -83,7 +83,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "root = \"/home/lcornelis/code/bgbench/run_data/omics/\"\n",
+    "root = \"./run_data/omics/\"\n",
     "name = osp.join(\n",
     "    root,\n",
     "    f\"{dataset.data_name}\",\n",
@@ -119,7 +119,7 @@
     "    Get statistics of the graph.\n",
     "    \"\"\"\n",
     "    # Load the adjacency matrix\n",
-    "    root = \"/home/lcornelis/code/bgbench/run_data/omics/\"\n",
+    "    root = \"./run_data/omics/\"\n",
     "    name = osp.join(\n",
     "        root,\n",
     "        f\"{dataset.data_name}\",\n",

diff --git a/tutorials/dataset_stats_analysis.py b/tutorials/dataset_stats_analysis.py
@@ -39,7 +39,7 @@ def load_dataset(
 
     if node_sample_ratio == 'full':
         dataset = HFOmicsDataset(
-            root='/home/johmathe/bgbench/run_data/omics',
+            root='./run_data/omics',
             data_name=dataset_name,
             method=method,
             adjacency_threshold=adj_thresh,
@@ -49,7 +49,7 @@ def load_dataset(
         )
     else:
         dataset = HFOmicsDataset(
-            root='/home/johmathe/bgbench/run_data/omics',
+            root='./run_data/omics',
             data_name=dataset_name,
             method=method,
             adjacency_threshold=adj_thresh,
@@ -85,7 +85,7 @@ def get_graph_stats(dataset: Any) -> dict[str, float]:
             graph.add_nodes_from(range(num_nodes))
             graph.add_edges_from(edge_list)
         else:
-            root = '/home/johmathe/bgbench/run_data/omics/'
+            root = './run_data/omics/'
             name = osp.join(
                 root,
                 f'{dataset.data_name}',

diff --git a/webapp/README.md b/webapp/README.md
@@ -102,19 +102,20 @@ python precompute_stats.py
 ```
 
 This will:
-1. Download datasets from HuggingFace (`geometric-intelligence/bgbench`)
+
+1. Download datasets from HuggingFace (`<anonymous>/bgbench`)
 2. Compute graph statistics for all 324 combinations:
    - 3 datasets × 6 ratios × 3 methods × 6 thresholds
 3. Save results to `public/data/stats.json`
 
 **Parameters computed:**
 
-| Parameter | Values |
-|-----------|--------|
-| Datasets | `motrpac`, `addneuromed`, `parkinsons` |
-| Node sample ratios | 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 |
-| Selection methods | `variance`, `correlation`, `random` |
-| Adjacency thresholds | 0.02, 0.1, 0.2, 0.3, 0.4, 0.5 |
+| Parameter            | Values                                 |
+| -------------------- | -------------------------------------- |
+| Datasets             | `motrpac`, `addneuromed`, `parkinsons` |
+| Node sample ratios   | 0.5, 0.6, 0.7, 0.8, 0.9, 1.0           |
+| Selection methods    | `variance`, `correlation`, `random`    |
+| Adjacency thresholds | 0.02, 0.1, 0.2, 0.3, 0.4, 0.5          |
 
 **Metrics computed per graph:**
 
@@ -173,12 +174,14 @@ This will:
 ### How to Update
 
 1. **Replace the JSON files** in `public/data/`:
+
    ```bash
    cp /path/to/new/results.json public/data/results.json
    cp /path/to/new/stats.json public/data/stats.json
    ```
 
 2. **Rebuild and deploy**:
+
    ```bash
    make deploy
    ```
@@ -189,6 +192,7 @@ Results key: `{dataset}|{ratio}|{method}|{threshold}|{model}`
 Stats key: `{dataset}|{ratio}|{method}|{threshold}`
 
 Where:
+
 - `dataset`: `motrpac`, `addneuromed`, or `parkinsons`
 - `ratio`: node sample ratio (0.5–0.9)
 - `method`: `variance`, `correlation`, or `random`
-Original file line number
+Diff line change
@@ Expand Up / @@ -49,7 +49,7 @@ @@
         },
     }
-    HF_REPO_ID = 'geometric-intelligence/bgbench'
+    HF_REPO_ID = '<anonymous>/bgbench'
     def load_and_preprocess_dataset(
@@ Expand Down @@