diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index eb06565..394d44b 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -7,4 +7,5 @@
 /.github/ @NVIDIA-NeMo/data_designer_reviewers
 
 # Plugins
+/plugins/data-designer-github/ @eric-tramel
 /plugins/data-designer-template/ @NVIDIA-NeMo/data_designer_reviewers
diff --git a/docs/plugins/data-designer-github/index.md b/docs/plugins/data-designer-github/index.md
new file mode 100644
index 0000000..1243d43
--- /dev/null
+++ b/docs/plugins/data-designer-github/index.md
@@ -0,0 +1,79 @@
+# data-designer-github
+
+`data-designer-github` is a Data Designer seed reader for repository files. It
+turns GitHub repositories or local git repositories into seed rows that carry
+file content, path metadata, repository provenance, and commit identifiers.
+
+Use it when a workflow needs code repository data as the starting point for
+generation, review, transformation, or indexing tasks. The reader is intentionally
+file-oriented: each matching text file becomes one seed row, and downstream Data
+Designer columns decide how to summarize, critique, rewrite, label, or enrich
+that row.
+
+## Installation
+
+```bash
+uv add data-designer data-designer-github
+```
+
+The plugin is discovered through the `data_designer.plugins` entry point once it
+is installed in the same environment as Data Designer.
+
+## Seed source
+
+Use the `github` seed source when the seed dataset should come from one or more
+repositories.
+
+| Field | Required | Description |
+| --- | --- | --- |
+| `path` | No | A local git repository path, or a directory whose immediate children are git repositories. |
+| `repositories` | No | GitHub repositories to clone. Entries may be `owner/name`, `https://github.com/owner/name`, or `https://github.com/owner/name.git`. |
+| `repository_paths` | No | Additional explicit local git repository paths to read. |
+| `ref` | No | Branch, tag, or commit to check out for cloned GitHub repositories. |
+| `clone_depth` | No | Shallow clone depth for GitHub repositories. Defaults to `1`; set to `None` for a full clone. |
+| `clone_timeout_seconds` | No | Timeout for each clone or checkout operation. Defaults to `300`. |
+| `file_pattern` | No | Inherited file glob from Data Designer's filesystem seed source. For example, `*.py`. |
+| `recursive` | No | Whether `file_pattern` is applied recursively. |
+| `include_extensions` | No | File extensions to include after the glob match. Defaults to common code and documentation extensions. Set to `None` to allow every extension. |
+| `include_file_names` | No | Extensionless file names to include, such as `Dockerfile` and `Makefile`. |
+| `exclude_patterns` | No | Relative path glob patterns to skip, including `.git`, cache, build, virtualenv, and dependency directories by default. |
+| `max_file_size_bytes` | No | Maximum file size to hydrate into `content`. Defaults to `1_000_000`. |
+| `encoding` | No | Text encoding used when reading file contents. Defaults to `utf-8`. |
+
+At least one of `path`, `repositories`, or `repository_paths` is required.
+
+## Output columns
+
+| Column | Description |
+| --- | --- |
+| `repo_id` | Repository identifier. GitHub repositories use `owner/name`; local repositories use their GitHub remote when available, otherwise the directory name. |
+| `repo_url` | Remote origin URL when available. |
+| `commit_sha` | Checked-out commit SHA for the repository. |
+| `source_kind` | `github` for cloned repositories, or `git_repository` for local repositories. |
+| `repository_path` | Local path used by the reader. GitHub repositories are cloned into a temporary runtime directory. |
+| `source_path` | Absolute path to the file that produced the seed row. |
+| `relative_path` | File path relative to the repository root. |
+| `file_name` | Basename of the file. |
+| `file_extension` | Lowercase file extension. |
+| `code_lang` | Language hint inferred from the file name or extension. |
+| `size_bytes` | File size at manifest time. |
+| `content_sha256` | SHA-256 hash of the hydrated file bytes. |
+| `content` | Decoded text content. |
+
+## Behavior
+
+When the reader is attached, it resolves local repository roots, clones any
+configured GitHub repositories, records the checked-out commit, and builds a
+manifest of matching files. File content is read during row hydration, so Data
+Designer can batch and sample repository content using the same seed reader
+interfaces as other filesystem-backed datasets.
+
+The plugin reads repository files only. It does not parse code into functions,
+classes, symbols, dependency graphs, or AST nodes. If a workflow needs those
+structures, use this reader to collect stable file-level inputs and add
+downstream columns that perform the language-specific analysis.
+
+The plugin shells out to `git` for repository operations and does not manage
+GitHub API tokens. Public repositories work directly. Private repositories
+require the execution environment's git credential configuration to already have
+access.
diff --git a/docs/plugins/data-designer-github/usage.md b/docs/plugins/data-designer-github/usage.md
new file mode 100644
index 0000000..7f96e8b
--- /dev/null
+++ b/docs/plugins/data-designer-github/usage.md
@@ -0,0 +1,165 @@
+# Usage
+
+This tutorial walks through the common patterns for turning repositories into
+Data Designer seed rows. The examples use the Python builder API, but the same
+configuration fields apply when a workflow is built from serialized config.
+
+## Read a GitHub repository
+
+Start with a small repository and a narrow file pattern. This keeps previews
+fast and makes it clear which rows are entering the workflow.
+
+```python
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.interface.data_designer import DataDesigner
+from data_designer_github.config import GitHubSeedSource
+
+builder = DataDesignerConfigBuilder()
+builder.with_seed_dataset(
+    GitHubSeedSource(
+        repositories=["pallets/markupsafe"],
+        file_pattern="*.py",
+        recursive=True,
+    )
+)
+
+builder.add_column(
+    name="_row_id",
+    column_type="sampler",
+    sampler_type="uuid",
+    params={},
+)
+
+preview = DataDesigner().preview(builder, num_records=5)
+print(preview.dataset[["repo_id", "relative_path", "code_lang", "content"]])
+```
+
+The seed rows contain repository provenance and file text. Downstream columns can
+then ask questions such as "summarize this file", "identify risky APIs", "write
+a short module description", or "extract candidate test scenarios" using the
+`content`, `relative_path`, `code_lang`, and `commit_sha` columns.
+
+## Pin a branch, tag, or commit
+
+Use `ref` when the dataset must be reproducible against a specific branch, tag,
+or commit. Branches and tags are passed to `git clone --branch`; commit SHAs are
+checked out after cloning.
+
+```python
+source = GitHubSeedSource(
+    repositories=["NVIDIA-NeMo/DataDesigner"],
+    ref="v0.5.7",
+    clone_depth=1,
+    file_pattern="*.py",
+    recursive=True,
+)
+```
+
+For arbitrary commit SHAs, set `clone_depth=None` if the commit may not be
+reachable from the shallow default clone.
+
+```python
+source = GitHubSeedSource(
+    repositories=["NVIDIA-NeMo/DataDesigner"],
+    ref="0123456789abcdef0123456789abcdef01234567",
+    clone_depth=None,
+    file_pattern="*.py",
+    recursive=True,
+)
+```
+
+## Read local repositories
+
+Local repositories are useful for private code, local experiments, or a checked
+out monorepo that already exists on disk.
+
+```python
+source = GitHubSeedSource(
+    repository_paths=[
+        "/workspace/services/api",
+        "/workspace/libraries/shared",
+    ],
+    file_pattern="*.py",
+    recursive=True,
+)
+```
+
+If `path` points at a git repository, that repository is read. If `path` points
+at a directory whose immediate children are git repositories, each child
+repository is discovered and read.
+
+```python
+source = GitHubSeedSource(
+    path="/workspace/repos",
+    file_pattern="*.ts",
+    recursive=True,
+)
+```
+
+## Control which files become rows
+
+The reader first applies `file_pattern` and `recursive`, then filters by
+extension, file name, exclude pattern, and file size.
+
+```python
+source = GitHubSeedSource(
+    repositories=["NVIDIA-NeMo/DataDesigner"],
+    file_pattern="*",
+    recursive=True,
+    include_extensions=["py", "toml", "md"],
+    include_file_names=["Dockerfile", "Makefile"],
+    exclude_patterns=[
+        ".git/**",
+        "**/__pycache__/**",
+        "**/build/**",
+        "**/dist/**",
+        "docs/generated/**",
+    ],
+    max_file_size_bytes=250_000,
+)
+```
+
+Use `include_extensions=None` for broad repository inventory tasks where the
+glob and exclude patterns should decide the candidate set.
+
+```python
+source = GitHubSeedSource(
+    repositories=["owner/repo"],
+    file_pattern="LICENSE*",
+    recursive=False,
+    include_extensions=None,
+)
+```
+
+## Typical workflows
+
+`data-designer-github` works best as the seed layer for file-level code
+workflows:
+
+- Repository QA: score files for risky dependencies, missing license headers, or
+  stale implementation notes.
+- Documentation generation: turn source files into module summaries, migration
+  notes, or API reference drafts.
+- Test ideation: derive test scenarios from implementation files and route them
+  to a code-generation column.
+- Code search preparation: create embeddings or labels from stable file content
+  and repository metadata.
+- Dataset construction: sample representative code files from several projects
+  while preserving `repo_id`, `relative_path`, and `commit_sha` provenance.
+
+Because the reader emits full file content, prompts should account for file
+length and language. A common pattern is to filter or sample seed rows first,
+then generate focused columns that reference only the metadata and content each
+task needs.
+
+## Operational notes
+
+The plugin requires `git` on `PATH`. GitHub repositories are cloned into a
+temporary runtime directory for the reader attachment and local repositories are
+read in place. Files that exceed `max_file_size_bytes` are skipped before
+hydration. Files that cannot be decoded with `encoding` are skipped with a
+warning rather than producing partial text.
+
+The reader does not call the GitHub API, manage credentials, or expand GitHub
+issues and pull requests. It is scoped to repository file content so workflows
+can compose repository-aware seed data with the rest of Data Designer.
diff --git a/docs/plugins/index.md b/docs/plugins/index.md
index 4e54e2e..d488636 100644
--- a/docs/plugins/index.md
+++ b/docs/plugins/index.md
@@ -5,6 +5,17 @@
 Browse available Data Designer plugins by what they add to your data generation workflow.
 
 <div class="plugin-doc-grid">
+  <a class="plugin-doc-card" href="data-designer-github/" aria-label="Open data-designer-github documentation">
+    <span class="plugin-doc-card__header">
+      <span class="plugin-doc-card__title">data-designer-github</span>
+      <span class="plugin-doc-card__version">v0.1.0</span>
+    </span>
+    <span class="plugin-doc-card__description">GitHub and local git repository seed reader for Data Designer</span>
+    <span class="plugin-doc-card__section">
+      <span class="plugin-doc-card__label">Column types</span>
+      <span class="plugin-doc-card__chips"><span class="plugin-doc-chip">github</span></span>
+    </span>
+  </a>
   <a class="plugin-doc-card" href="data-designer-template/" aria-label="Open data-designer-template documentation">
     <span class="plugin-doc-card__header">
       <span class="plugin-doc-card__title">data-designer-template</span>
diff --git a/plugins/data-designer-github/CODEOWNERS b/plugins/data-designer-github/CODEOWNERS
new file mode 100644
index 0000000..e0e141b
--- /dev/null
+++ b/plugins/data-designer-github/CODEOWNERS
@@ -0,0 +1,3 @@
+# Owner(s) of this plugin — used to generate the root CODEOWNERS file.
+# GitHub accepts @username, @org/team, or email format.
+* @eric-tramel
diff --git a/plugins/data-designer-github/README.md b/plugins/data-designer-github/README.md
new file mode 100644
index 0000000..68671e3
--- /dev/null
+++ b/plugins/data-designer-github/README.md
@@ -0,0 +1,51 @@
+# data-designer-github
+
+GitHub and local git repository seed reader for
+[NeMo Data Designer](https://github.com/NVIDIA-NeMo/DataDesigner).
+
+## Installation
+
+```bash
+pip install data-designer-github
+```
+
+## Usage
+
+This plugin provides a `github` seed source. Once installed, the seed reader is
+automatically discovered by Data Designer.
+
+```python
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.interface.data_designer import DataDesigner
+from data_designer_github.config import GitHubSeedSource
+
+builder = DataDesignerConfigBuilder()
+builder.with_seed_dataset(
+    GitHubSeedSource(
+        repositories=["NVIDIA-NeMo/DataDesigner"],
+        file_pattern="*.py",
+        recursive=True,
+    )
+)
+
+preview = DataDesigner().preview(builder, num_records=5)
+print(preview.dataset[["repo_id", "relative_path", "code_lang", "content"]])
+```
+
+The reader can also scan local git repositories:
+
+```python
+builder.with_seed_dataset(
+    GitHubSeedSource(
+        path="/path/to/repos",
+        repository_paths=["/path/to/one/repo"],
+        file_pattern="*.py",
+    )
+)
+```
+
+Seed columns include repository metadata, file paths, language hints, file
+content, and content SHA-256 hashes.
+
+For the full plugin authoring guide, see the
+[main repository docs](https://github.com/NVIDIA-NeMo/DataDesignerPlugins/blob/main/docs/adding-a-plugin.md).
diff --git a/plugins/data-designer-github/docs/index.md b/plugins/data-designer-github/docs/index.md
new file mode 100644
index 0000000..1243d43
--- /dev/null
+++ b/plugins/data-designer-github/docs/index.md
@@ -0,0 +1,79 @@
+# data-designer-github
+
+`data-designer-github` is a Data Designer seed reader for repository files. It
+turns GitHub repositories or local git repositories into seed rows that carry
+file content, path metadata, repository provenance, and commit identifiers.
+
+Use it when a workflow needs code repository data as the starting point for
+generation, review, transformation, or indexing tasks. The reader is intentionally
+file-oriented: each matching text file becomes one seed row, and downstream Data
+Designer columns decide how to summarize, critique, rewrite, label, or enrich
+that row.
+
+## Installation
+
+```bash
+uv add data-designer data-designer-github
+```
+
+The plugin is discovered through the `data_designer.plugins` entry point once it
+is installed in the same environment as Data Designer.
+
+## Seed source
+
+Use the `github` seed source when the seed dataset should come from one or more
+repositories.
+
+| Field | Required | Description |
+| --- | --- | --- |
+| `path` | No | A local git repository path, or a directory whose immediate children are git repositories. |
+| `repositories` | No | GitHub repositories to clone. Entries may be `owner/name`, `https://github.com/owner/name`, or `https://github.com/owner/name.git`. |
+| `repository_paths` | No | Additional explicit local git repository paths to read. |
+| `ref` | No | Branch, tag, or commit to check out for cloned GitHub repositories. |
+| `clone_depth` | No | Shallow clone depth for GitHub repositories. Defaults to `1`; set to `None` for a full clone. |
+| `clone_timeout_seconds` | No | Timeout for each clone or checkout operation. Defaults to `300`. |
+| `file_pattern` | No | Inherited file glob from Data Designer's filesystem seed source. For example, `*.py`. |
+| `recursive` | No | Whether `file_pattern` is applied recursively. |
+| `include_extensions` | No | File extensions to include after the glob match. Defaults to common code and documentation extensions. Set to `None` to allow every extension. |
+| `include_file_names` | No | Extensionless file names to include, such as `Dockerfile` and `Makefile`. |
+| `exclude_patterns` | No | Relative path glob patterns to skip, including `.git`, cache, build, virtualenv, and dependency directories by default. |
+| `max_file_size_bytes` | No | Maximum file size to hydrate into `content`. Defaults to `1_000_000`. |
+| `encoding` | No | Text encoding used when reading file contents. Defaults to `utf-8`. |
+
+At least one of `path`, `repositories`, or `repository_paths` is required.
+
+## Output columns
+
+| Column | Description |
+| --- | --- |
+| `repo_id` | Repository identifier. GitHub repositories use `owner/name`; local repositories use their GitHub remote when available, otherwise the directory name. |
+| `repo_url` | Remote origin URL when available. |
+| `commit_sha` | Checked-out commit SHA for the repository. |
+| `source_kind` | `github` for cloned repositories, or `git_repository` for local repositories. |
+| `repository_path` | Local path used by the reader. GitHub repositories are cloned into a temporary runtime directory. |
+| `source_path` | Absolute path to the file that produced the seed row. |
+| `relative_path` | File path relative to the repository root. |
+| `file_name` | Basename of the file. |
+| `file_extension` | Lowercase file extension. |
+| `code_lang` | Language hint inferred from the file name or extension. |
+| `size_bytes` | File size at manifest time. |
+| `content_sha256` | SHA-256 hash of the hydrated file bytes. |
+| `content` | Decoded text content. |
+
+## Behavior
+
+When the reader is attached, it resolves local repository roots, clones any
+configured GitHub repositories, records the checked-out commit, and builds a
+manifest of matching files. File content is read during row hydration, so Data
+Designer can batch and sample repository content using the same seed reader
+interfaces as other filesystem-backed datasets.
+
+The plugin reads repository files only. It does not parse code into functions,
+classes, symbols, dependency graphs, or AST nodes. If a workflow needs those
+structures, use this reader to collect stable file-level inputs and add
+downstream columns that perform the language-specific analysis.
+
+The plugin shells out to `git` for repository operations and does not manage
+GitHub API tokens. Public repositories work directly. Private repositories
+require the execution environment's git credential configuration to already have
+access.
diff --git a/plugins/data-designer-github/docs/usage.md b/plugins/data-designer-github/docs/usage.md
new file mode 100644
index 0000000..7f96e8b
--- /dev/null
+++ b/plugins/data-designer-github/docs/usage.md
@@ -0,0 +1,165 @@
+# Usage
+
+This tutorial walks through the common patterns for turning repositories into
+Data Designer seed rows. The examples use the Python builder API, but the same
+configuration fields apply when a workflow is built from serialized config.
+
+## Read a GitHub repository
+
+Start with a small repository and a narrow file pattern. This keeps previews
+fast and makes it clear which rows are entering the workflow.
+
+```python
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.interface.data_designer import DataDesigner
+from data_designer_github.config import GitHubSeedSource
+
+builder = DataDesignerConfigBuilder()
+builder.with_seed_dataset(
+    GitHubSeedSource(
+        repositories=["pallets/markupsafe"],
+        file_pattern="*.py",
+        recursive=True,
+    )
+)
+
+builder.add_column(
+    name="_row_id",
+    column_type="sampler",
+    sampler_type="uuid",
+    params={},
+)
+
+preview = DataDesigner().preview(builder, num_records=5)
+print(preview.dataset[["repo_id", "relative_path", "code_lang", "content"]])
+```
+
+The seed rows contain repository provenance and file text. Downstream columns can
+then ask questions such as "summarize this file", "identify risky APIs", "write
+a short module description", or "extract candidate test scenarios" using the
+`content`, `relative_path`, `code_lang`, and `commit_sha` columns.
+
+## Pin a branch, tag, or commit
+
+Use `ref` when the dataset must be reproducible against a specific branch, tag,
+or commit. Branches and tags are passed to `git clone --branch`; commit SHAs are
+checked out after cloning.
+
+```python
+source = GitHubSeedSource(
+    repositories=["NVIDIA-NeMo/DataDesigner"],
+    ref="v0.5.7",
+    clone_depth=1,
+    file_pattern="*.py",
+    recursive=True,
+)
+```
+
+For arbitrary commit SHAs, set `clone_depth=None` if the commit may not be
+reachable from the shallow default clone.
+
+```python
+source = GitHubSeedSource(
+    repositories=["NVIDIA-NeMo/DataDesigner"],
+    ref="0123456789abcdef0123456789abcdef01234567",
+    clone_depth=None,
+    file_pattern="*.py",
+    recursive=True,
+)
+```
+
+## Read local repositories
+
+Local repositories are useful for private code, local experiments, or a checked
+out monorepo that already exists on disk.
+
+```python
+source = GitHubSeedSource(
+    repository_paths=[
+        "/workspace/services/api",
+        "/workspace/libraries/shared",
+    ],
+    file_pattern="*.py",
+    recursive=True,
+)
+```
+
+If `path` points at a git repository, that repository is read. If `path` points
+at a directory whose immediate children are git repositories, each child
+repository is discovered and read.
+
+```python
+source = GitHubSeedSource(
+    path="/workspace/repos",
+    file_pattern="*.ts",
+    recursive=True,
+)
+```
+
+## Control which files become rows
+
+The reader first applies `file_pattern` and `recursive`, then filters by
+extension, file name, exclude pattern, and file size.
+
+```python
+source = GitHubSeedSource(
+    repositories=["NVIDIA-NeMo/DataDesigner"],
+    file_pattern="*",
+    recursive=True,
+    include_extensions=["py", "toml", "md"],
+    include_file_names=["Dockerfile", "Makefile"],
+    exclude_patterns=[
+        ".git/**",
+        "**/__pycache__/**",
+        "**/build/**",
+        "**/dist/**",
+        "docs/generated/**",
+    ],
+    max_file_size_bytes=250_000,
+)
+```
+
+Use `include_extensions=None` for broad repository inventory tasks where the
+glob and exclude patterns should decide the candidate set.
+
+```python
+source = GitHubSeedSource(
+    repositories=["owner/repo"],
+    file_pattern="LICENSE*",
+    recursive=False,
+    include_extensions=None,
+)
+```
+
+## Typical workflows
+
+`data-designer-github` works best as the seed layer for file-level code
+workflows:
+
+- Repository QA: score files for risky dependencies, missing license headers, or
+  stale implementation notes.
+- Documentation generation: turn source files into module summaries, migration
+  notes, or API reference drafts.
+- Test ideation: derive test scenarios from implementation files and route them
+  to a code-generation column.
+- Code search preparation: create embeddings or labels from stable file content
+  and repository metadata.
+- Dataset construction: sample representative code files from several projects
+  while preserving `repo_id`, `relative_path`, and `commit_sha` provenance.
+
+Because the reader emits full file content, prompts should account for file
+length and language. A common pattern is to filter or sample seed rows first,
+then generate focused columns that reference only the metadata and content each
+task needs.
+
+## Operational notes
+
+The plugin requires `git` on `PATH`. GitHub repositories are cloned into a
+temporary runtime directory for the reader attachment and local repositories are
+read in place. Files that exceed `max_file_size_bytes` are skipped before
+hydration. Files that cannot be decoded with `encoding` are skipped with a
+warning rather than producing partial text.
+
+The reader does not call the GitHub API, manage credentials, or expand GitHub
+issues and pull requests. It is scoped to repository file content so workflows
+can compose repository-aware seed data with the rest of Data Designer.
diff --git a/plugins/data-designer-github/pyproject.toml b/plugins/data-designer-github/pyproject.toml
new file mode 100644
index 0000000..359c6f9
--- /dev/null
+++ b/plugins/data-designer-github/pyproject.toml
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+[project]
+name = "data-designer-github"
+version = "0.1.0"
+description = "GitHub and local git repository seed reader for Data Designer"
+requires-python = ">=3.10"
+dependencies = [
+    "data-designer>=0.5.7",
+]
+license = "Apache-2.0"
+readme = "README.md"
+authors = [
+    {name = "NVIDIA Corporation"},
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Programming Language :: Python :: 3",
+]
+
+[project.entry-points."data_designer.plugins"]
+github = "data_designer_github.plugin:plugin"
+
+[project.urls]
+Repository = "https://github.com/NVIDIA-NeMo/DataDesignerPlugins"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/data_designer_github"]
+
+[tool.ruff]
+extend = "../../pyproject.toml"
diff --git a/plugins/data-designer-github/src/data_designer_github/__init__.py b/plugins/data-designer-github/src/data_designer_github/__init__.py
new file mode 100644
index 0000000..52a7a9d
--- /dev/null
+++ b/plugins/data-designer-github/src/data_designer_github/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/plugins/data-designer-github/src/data_designer_github/config.py b/plugins/data-designer-github/src/data_designer_github/config.py
new file mode 100644
index 0000000..6285de5
--- /dev/null
+++ b/plugins/data-designer-github/src/data_designer_github/config.py
@@ -0,0 +1,217 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import codecs
+from pathlib import Path
+from typing import ClassVar, Literal
+
+from data_designer.config.base import ConfigBase
+from data_designer.config.seed_source import FileSystemSeedSource
+from pydantic import Field, field_validator, model_validator
+from typing_extensions import Self
+
+DEFAULT_CODE_EXTENSIONS = [
+    ".bash",
+    ".c",
+    ".cc",
+    ".cfg",
+    ".cpp",
+    ".cs",
+    ".css",
+    ".go",
+    ".h",
+    ".hpp",
+    ".html",
+    ".java",
+    ".js",
+    ".json",
+    ".jsx",
+    ".kt",
+    ".kts",
+    ".lua",
+    ".md",
+    ".php",
+    ".py",
+    ".rb",
+    ".rs",
+    ".scala",
+    ".sh",
+    ".sql",
+    ".swift",
+    ".toml",
+    ".ts",
+    ".tsx",
+    ".yaml",
+    ".yml",
+    ".zsh",
+]
+
+DEFAULT_CODE_FILENAMES = [
+    "Dockerfile",
+    "Makefile",
+]
+
+DEFAULT_EXCLUDE_PATTERNS = [
+    ".git/*",
+    ".git/**",
+    ".mypy_cache/*",
+    ".pytest_cache/*",
+    ".ruff_cache/*",
+    ".tox/*",
+    ".venv/*",
+    "__pycache__/*",
+    "build/*",
+    "dist/*",
+    "node_modules/*",
+    "venv/*",
+    "**/.git/*",
+    "**/.git/**",
+    "**/.mypy_cache/*",
+    "**/.pytest_cache/*",
+    "**/.ruff_cache/*",
+    "**/.tox/*",
+    "**/.venv/*",
+    "**/__pycache__/*",
+    "**/build/*",
+    "**/dist/*",
+    "**/node_modules/*",
+    "**/venv/*",
+]
+
+
+class GitHubSeedSource(FileSystemSeedSource, ConfigBase):
+    """Seed source for reading code files from GitHub and local git repositories."""
+
+    seed_type: Literal["github"] = "github"
+
+    path: str | None = Field(
+        None,
+        description=(
+            "Optional local git repository path, or a directory whose immediate children are git repositories. "
+            "Relative paths are resolved from the current working directory when the config is loaded."
+        ),
+    )
+    repositories: list[str] = Field(
+        default_factory=list,
+        description=(
+            "GitHub repositories to clone before reading. Each entry may be 'owner/name', "
+            "'https://github.com/owner/name', or 'https://github.com/owner/name.git'."
+        ),
+    )
+    repository_paths: list[str] = Field(
+        default_factory=list,
+        description="Additional local git repository paths to read.",
+    )
+    ref: str | None = Field(
+        None,
+        description="Optional branch, tag, or commit to check out after cloning GitHub repositories.",
+    )
+    clone_depth: int | None = Field(
+        1,
+        ge=1,
+        description="Depth for GitHub clones. Set to null for a full clone.",
+    )
+    clone_timeout_seconds: int = Field(
+        300,
+        ge=1,
+        description="Timeout for each git clone or checkout operation.",
+    )
+    include_extensions: list[str] | None = Field(
+        default_factory=lambda: list(DEFAULT_CODE_EXTENSIONS),
+        description=(
+            "Lowercase file extensions to include. Values may include or omit the leading dot. "
+            "Set to null to include every extension."
+        ),
+    )
+    include_file_names: list[str] = Field(
+        default_factory=lambda: list(DEFAULT_CODE_FILENAMES),
+        description="Extensionless file names to include, such as Dockerfile or Makefile.",
+    )
+    exclude_patterns: list[str] = Field(
+        default_factory=lambda: list(DEFAULT_EXCLUDE_PATTERNS),
+        description="Relative path glob patterns to exclude from repository scans.",
+    )
+    max_file_size_bytes: int = Field(
+        1_000_000,
+        ge=1,
+        description="Maximum file size to hydrate into the content column.",
+    )
+    encoding: str = Field(
+        "utf-8",
+        description="Text encoding used when hydrating repository file contents.",
+    )
+
+    _source_fields: ClassVar[tuple[str, ...]] = ("path", "repositories", "repository_paths")
+
+    @model_validator(mode="after")
+    def validate_has_repository_source(self) -> Self:
+        """Ensure the seed source has at least one repository source."""
+        if self.path is None and not self.repositories and not self.repository_paths:
+            fields = ", ".join(self._source_fields)
+            raise ValueError(f"At least one of {fields} must be provided.")
+        return self
+
+    @field_validator("encoding", mode="after")
+    @classmethod
+    def validate_encoding(cls, value: str) -> str:
+        """Validate that the configured text encoding exists."""
+        try:
+            codecs.lookup(value)
+        except LookupError as error:
+            raise ValueError(f"Unknown encoding: {value!r}. Use a valid Python codec name.") from error
+        return value
+
+    @field_validator("include_extensions", mode="after")
+    @classmethod
+    def normalize_include_extensions(cls, value: list[str] | None) -> list[str] | None:
+        """Normalize configured extensions to lowercase dotted values."""
+        if value is None:
+            return None
+
+        normalized: list[str] = []
+        for extension in value:
+            stripped = extension.strip().lower()
+            if not stripped:
+                raise ValueError("include_extensions cannot contain empty values.")
+            normalized.append(stripped if stripped.startswith(".") else f".{stripped}")
+        return sorted(set(normalized))
+
+    @field_validator("include_file_names", "exclude_patterns", mode="after")
+    @classmethod
+    def validate_non_empty_strings(cls, value: list[str]) -> list[str]:
+        """Validate string list fields do not contain blank entries."""
+        for item in value:
+            if not item.strip():
+                raise ValueError("String lists cannot contain empty values.")
+        return value
+
+    @field_validator("repositories", mode="after")
+    @classmethod
+    def validate_repositories(cls, value: list[str]) -> list[str]:
+        """Validate repository specs do not contain blank entries."""
+        for repository in value:
+            if not repository.strip():
+                raise ValueError("repositories cannot contain empty values.")
+        return value
+
+    @field_validator("repository_paths", mode="after")
+    @classmethod
+    def validate_repository_paths(cls, value: list[str]) -> list[str]:
+        """Validate explicit local repository paths exist."""
+        for repository_path in value:
+            path = Path(repository_path).expanduser().resolve()
+            if not path.is_dir():
+                raise ValueError(f"Repository path {path} is not a directory.")
+        return value
+
+    @property
+    def runtime_path(self) -> str:
+        """Return the resolved local scan root after a reader has prepared it."""
+        if self._runtime_path is not None:
+            return self._runtime_path
+        if self.path is None:
+            raise ValueError("GitHubSeedSource.runtime_path is available after the seed reader is attached.")
+        self._runtime_path = str(Path(self.path).expanduser().resolve())
+        return self._runtime_path
diff --git a/plugins/data-designer-github/src/data_designer_github/impl.py b/plugins/data-designer-github/src/data_designer_github/impl.py
new file mode 100644
index 0000000..c228b79
--- /dev/null
+++ b/plugins/data-designer-github/src/data_designer_github/impl.py
@@ -0,0 +1,409 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import hashlib
+import logging
+import re
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass
+from fnmatch import fnmatchcase
+from pathlib import Path
+from typing import Any, ClassVar
+from urllib.parse import urlparse
+
+import pandas as pd
+from data_designer.engine.resources.seed_reader import (
+    FileSystemSeedReader,
+    SeedReaderError,
+    SeedReaderFileSystemContext,
+)
+
+from data_designer_github.config import GitHubSeedSource
+
+logger = logging.getLogger(__name__)
+
+
+LANGUAGE_BY_EXTENSION = {
+    ".bash": "bash",
+    ".c": "c",
+    ".cc": "cpp",
+    ".cfg": "config",
+    ".cpp": "cpp",
+    ".cs": "csharp",
+    ".css": "css",
+    ".go": "go",
+    ".h": "c",
+    ".hpp": "cpp",
+    ".html": "html",
+    ".java": "java",
+    ".js": "javascript",
+    ".json": "json",
+    ".jsx": "javascript",
+    ".kt": "kotlin",
+    ".kts": "kotlin",
+    ".lua": "lua",
+    ".md": "markdown",
+    ".php": "php",
+    ".py": "python",
+    ".rb": "ruby",
+    ".rs": "rust",
+    ".scala": "scala",
+    ".sh": "shell",
+    ".sql": "sql",
+    ".swift": "swift",
+    ".toml": "toml",
+    ".ts": "typescript",
+    ".tsx": "typescript",
+    ".yaml": "yaml",
+    ".yml": "yaml",
+    ".zsh": "zsh",
+}
+
+LANGUAGE_BY_FILENAME = {
+    "Dockerfile": "dockerfile",
+    "Makefile": "makefile",
+}
+
+
+@dataclass(frozen=True)
+class RepositoryRoot:
+    """Prepared repository root available for manifest building."""
+
+    repo_id: str
+    repo_url: str | None
+    root_path: Path
+    source_kind: str
+    commit_sha: str | None
+
+
+class GitHubSeedReader(FileSystemSeedReader[GitHubSeedSource]):
+    """Read code files from GitHub clones and local git repositories."""
+
+    output_columns: ClassVar[list[str] | None] = [
+        "repo_id",
+        "repo_url",
+        "commit_sha",
+        "source_kind",
+        "repository_path",
+        "source_path",
+        "relative_path",
+        "file_name",
+        "file_extension",
+        "code_lang",
+        "size_bytes",
+        "content_sha256",
+        "content",
+    ]
+
+    def _reset_attachment_state(self) -> None:
+        super()._reset_attachment_state()
+        temp_dir = getattr(self, "_temp_dir", None)
+        if temp_dir is not None:
+            temp_dir.cleanup()
+        self._temp_dir: tempfile.TemporaryDirectory[str] | None = None
+        self._repository_roots: list[RepositoryRoot] | None = None
+
+    def build_manifest(self, *, context: SeedReaderFileSystemContext) -> pd.DataFrame | list[dict[str, Any]]:
+        """Build a cheap file manifest across every configured repository."""
+        records: list[dict[str, Any]] = []
+        for repository in self._get_repository_roots(context):
+            records.extend(self._build_repository_manifest(repository))
+        return records
+
+    def hydrate_row(
+        self,
+        *,
+        manifest_row: dict[str, Any],
+        context: SeedReaderFileSystemContext,
+    ) -> dict[str, Any] | list[dict[str, Any]]:
+        """Read file content and add it to a manifest row."""
+        del context
+        source_path = Path(str(manifest_row["source_path"]))
+        try:
+            content_bytes = source_path.read_bytes()
+            content = content_bytes.decode(self.source.encoding)
+        except UnicodeDecodeError as error:
+            logger.warning(
+                "Skipping file %s because it cannot be decoded as %s: %s",
+                source_path,
+                self.source.encoding,
+                error,
+            )
+            return []
+        except OSError as error:
+            raise SeedReaderError(f"Failed to read repository file {source_path}: {error}") from error
+
+        record = dict(manifest_row)
+        record["content_sha256"] = hashlib.sha256(content_bytes).hexdigest()
+        record["content"] = content
+        return record
+
+    def _get_filesystem_context(self) -> SeedReaderFileSystemContext:
+        self._ensure_attached()
+        context = getattr(self, "_filesystem_context", None)
+        if context is not None:
+            return context
+
+        runtime_root = self._prepare_runtime_root()
+        context = self.create_filesystem_context(runtime_root)
+        self._filesystem_context = context
+        return context
+
+    def _prepare_runtime_root(self) -> Path:
+        self._temp_dir = tempfile.TemporaryDirectory(prefix="data-designer-github-")
+        runtime_root = Path(self._temp_dir.name).resolve()
+
+        repository_roots = self._prepare_local_repositories()
+        clone_root = runtime_root / "github"
+        clone_root.mkdir(parents=True, exist_ok=True)
+        repository_roots.extend(self._clone_github_repositories(clone_root))
+
+        if not repository_roots:
+            raise SeedReaderError("GitHub seed source did not resolve any repositories.")
+
+        self.source._runtime_path = str(runtime_root)
+        self._repository_roots = repository_roots
+        return runtime_root
+
+    def _get_repository_roots(self, context: SeedReaderFileSystemContext) -> list[RepositoryRoot]:
+        del context
+        repository_roots = getattr(self, "_repository_roots", None)
+        if repository_roots is None:
+            raise SeedReaderError("Repository roots are not prepared.")
+        return repository_roots
+
+    def _prepare_local_repositories(self) -> list[RepositoryRoot]:
+        local_paths = _resolve_local_repository_paths(
+            parent_path=self.source.path,
+            repository_paths=self.source.repository_paths,
+        )
+        return [self._build_local_repository_root(path) for path in local_paths]
+
+    def _clone_github_repositories(self, clone_root: Path) -> list[RepositoryRoot]:
+        repository_roots: list[RepositoryRoot] = []
+        for repository_spec in self.source.repositories:
+            repo_id, repo_url = normalize_github_repository(repository_spec)
+            destination = clone_root / _safe_repo_directory_name(repo_id)
+            self._clone_repository(repo_url=repo_url, destination=destination)
+            if self.source.ref is not None:
+                _run_git(
+                    ["checkout", "--quiet", self.source.ref],
+                    cwd=destination,
+                    timeout=self.source.clone_timeout_seconds,
+                )
+            repository_roots.append(
+                RepositoryRoot(
+                    repo_id=repo_id,
+                    repo_url=repo_url,
+                    root_path=destination,
+                    source_kind="github",
+                    commit_sha=_get_commit_sha(destination),
+                )
+            )
+        return repository_roots
+
+    def _clone_repository(self, *, repo_url: str, destination: Path) -> None:
+        command = ["clone", "--quiet"]
+        if self.source.ref is not None and not _looks_like_commit_sha(self.source.ref):
+            command.extend(["--branch", self.source.ref])
+        if self.source.clone_depth is not None:
+            command.extend(["--depth", str(self.source.clone_depth)])
+        command.extend([repo_url, str(destination)])
+        _run_git(command, timeout=self.source.clone_timeout_seconds)
+
+    def _build_local_repository_root(self, root_path: Path) -> RepositoryRoot:
+        remote_url = _get_remote_url(root_path)
+        return RepositoryRoot(
+            repo_id=_repo_id_from_local_path(root_path, remote_url),
+            repo_url=remote_url,
+            root_path=root_path,
+            source_kind="git_repository",
+            commit_sha=_get_commit_sha(root_path),
+        )
+
+    def _build_repository_manifest(self, repository: RepositoryRoot) -> list[dict[str, Any]]:
+        records: list[dict[str, Any]] = []
+        for file_path in self._iter_matching_files(repository.root_path):
+            relative_path = file_path.relative_to(repository.root_path).as_posix()
+            stat = file_path.stat()
+            records.append(
+                {
+                    "repo_id": repository.repo_id,
+                    "repo_url": repository.repo_url,
+                    "commit_sha": repository.commit_sha,
+                    "source_kind": repository.source_kind,
+                    "repository_path": str(repository.root_path),
+                    "source_path": str(file_path),
+                    "relative_path": relative_path,
+                    "file_name": file_path.name,
+                    "file_extension": file_path.suffix.lower(),
+                    "code_lang": _detect_language(file_path),
+                    "size_bytes": stat.st_size,
+                    "content_sha256": "",
+                    "content": "",
+                }
+            )
+        return records
+
+    def _iter_matching_files(self, root_path: Path) -> list[Path]:
+        paths = (
+            root_path.rglob(self.source.file_pattern)
+            if self.source.recursive
+            else root_path.glob(self.source.file_pattern)
+        )
+        files = [path for path in paths if self._should_include_file(root_path=root_path, file_path=path)]
+        files.sort(key=lambda path: path.relative_to(root_path).as_posix())
+        return files
+
+    def _should_include_file(self, *, root_path: Path, file_path: Path) -> bool:
+        if not file_path.is_file():
+            return False
+
+        relative_path = file_path.relative_to(root_path).as_posix()
+        if any(fnmatchcase(relative_path, pattern) for pattern in self.source.exclude_patterns):
+            return False
+
+        try:
+            file_size = file_path.stat().st_size
+        except OSError as error:
+            logger.warning("Skipping file %s because it cannot be stat'ed: %s", file_path, error)
+            return False
+
+        if file_size > self.source.max_file_size_bytes:
+            return False
+
+        if file_path.name in self.source.include_file_names:
+            return True
+
+        include_extensions = self.source.include_extensions
+        return include_extensions is None or file_path.suffix.lower() in include_extensions
+
+
+def normalize_github_repository(repository: str) -> tuple[str, str]:
+    """Normalize a GitHub repository spec to ``(owner/name, clone_url)``."""
+    stripped = repository.strip()
+    parsed = urlparse(stripped)
+
+    if parsed.scheme in {"http", "https"}:
+        if parsed.netloc.lower() != "github.com":
+            raise SeedReaderError(f"Expected a github.com repository URL, got {repository!r}.")
+        repo_id = parsed.path.strip("/").removesuffix(".git")
+    elif stripped.startswith("git@github.com:"):
+        repo_id = stripped.removeprefix("git@github.com:").removesuffix(".git").strip("/")
+    else:
+        repo_id = stripped.removesuffix(".git").strip("/")
+
+    if not re.fullmatch(r"[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+", repo_id):
+        raise SeedReaderError(f"GitHub repository {repository!r} must use 'owner/name' or a github.com repository URL.")
+
+    return repo_id, f"https://github.com/{repo_id}.git"
+
+
+def _resolve_local_repository_paths(*, parent_path: str | None, repository_paths: list[str]) -> list[Path]:
+    roots: dict[Path, None] = {}
+    if parent_path is not None:
+        parent = Path(parent_path).expanduser().resolve()
+        top_level = _get_git_toplevel(parent)
+        if top_level is not None:
+            roots[top_level] = None
+        else:
+            for child in sorted(parent.iterdir()):
+                if child.is_dir():
+                    child_top_level = _get_git_toplevel(child)
+                    if child_top_level is not None:
+                        roots[child_top_level] = None
+
+    for repository_path in repository_paths:
+        path = Path(repository_path).expanduser().resolve()
+        top_level = _get_git_toplevel(path)
+        if top_level is None:
+            raise SeedReaderError(f"Repository path {path} is not a git repository.")
+        roots[top_level] = None
+
+    return list(roots)
+
+
+def _get_git_toplevel(path: Path) -> Path | None:
+    result = subprocess.run(
+        ["git", "-C", str(path), "rev-parse", "--show-toplevel"],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        return None
+    return Path(result.stdout.strip()).resolve()
+
+
+def _get_commit_sha(root_path: Path) -> str | None:
+    result = subprocess.run(
+        ["git", "-C", str(root_path), "rev-parse", "HEAD"],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        return None
+    return result.stdout.strip()
+
+
+def _get_remote_url(root_path: Path) -> str | None:
+    result = subprocess.run(
+        ["git", "-C", str(root_path), "config", "--get", "remote.origin.url"],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        return None
+    return result.stdout.strip() or None
+
+
+def _run_git(command: list[str], *, cwd: Path | None = None, timeout: int) -> None:
+    git = shutil.which("git")
+    if git is None:
+        raise SeedReaderError("git is required to read GitHub repositories, but it was not found on PATH.")
+
+    try:
+        result = subprocess.run(
+            [git, *command],
+            cwd=None if cwd is None else str(cwd),
+            capture_output=True,
+            text=True,
+            check=False,
+            timeout=timeout,
+        )
+    except subprocess.TimeoutExpired as error:
+        raise SeedReaderError(f"git {' '.join(command)} timed out after {timeout} seconds") from error
+
+    if result.returncode != 0:
+        detail = result.stderr.strip() or result.stdout.strip()
+        raise SeedReaderError(f"git {' '.join(command)} failed: {detail}")
+
+
+def _repo_id_from_local_path(root_path: Path, remote_url: str | None) -> str:
+    if remote_url:
+        try:
+            repo_id, _ = normalize_github_repository(remote_url)
+            return repo_id
+        except SeedReaderError:
+            pass
+    return root_path.name
+
+
+def _safe_repo_directory_name(repo_id: str) -> str:
+    return repo_id.replace("/", "__")
+
+
+def _looks_like_commit_sha(ref: str) -> bool:
+    return re.fullmatch(r"[0-9a-fA-F]{7,40}", ref) is not None
+
+
+def _detect_language(file_path: Path) -> str:
+    if file_path.name in LANGUAGE_BY_FILENAME:
+        return LANGUAGE_BY_FILENAME[file_path.name]
+    return LANGUAGE_BY_EXTENSION.get(file_path.suffix.lower(), file_path.suffix.lower().removeprefix("."))
diff --git a/plugins/data-designer-github/src/data_designer_github/plugin.py b/plugins/data-designer-github/src/data_designer_github/plugin.py
new file mode 100644
index 0000000..8a81988
--- /dev/null
+++ b/plugins/data-designer-github/src/data_designer_github/plugin.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from data_designer.plugins.plugin import Plugin, PluginType
+
+plugin = Plugin(
+    config_qualified_name="data_designer_github.config.GitHubSeedSource",
+    impl_qualified_name="data_designer_github.impl.GitHubSeedReader",
+    plugin_type=PluginType.SEED_READER,
+)
diff --git a/plugins/data-designer-github/tests/test_plugin.py b/plugins/data-designer-github/tests/test_plugin.py
new file mode 100644
index 0000000..be82445
--- /dev/null
+++ b/plugins/data-designer-github/tests/test_plugin.py
@@ -0,0 +1,104 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import subprocess
+from pathlib import Path
+
+import pytest
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.engine.secret_resolver import PlaintextResolver
+from data_designer.engine.testing.utils import assert_valid_plugin
+from data_designer.interface.data_designer import DataDesigner
+
+from data_designer_github.config import GitHubSeedSource
+from data_designer_github.impl import GitHubSeedReader, normalize_github_repository
+from data_designer_github.plugin import plugin
+
+
+def test_valid_plugin() -> None:
+    assert_valid_plugin(plugin)
+
+
+def test_normalize_github_repository() -> None:
+    assert normalize_github_repository("NVIDIA-NeMo/DataDesigner")[0] == "NVIDIA-NeMo/DataDesigner"
+    assert normalize_github_repository("https://github.com/NVIDIA-NeMo/DataDesigner.git")[0] == (
+        "NVIDIA-NeMo/DataDesigner"
+    )
+
+
+def test_source_requires_at_least_one_repository_source() -> None:
+    with pytest.raises(ValueError, match="At least one"):
+        GitHubSeedSource()
+
+
+def test_reader_hydrates_local_repository_files(tmp_path: Path) -> None:
+    repo = _create_git_repo(tmp_path / "sample-repo")
+    source = GitHubSeedSource(repository_paths=[str(repo)], file_pattern="*.py")
+    reader = GitHubSeedReader()
+    reader.attach(source, PlaintextResolver())
+
+    assert reader.get_seed_dataset_size() == 1
+    batch = reader.create_batch_reader(batch_size=10, index_range=None, shuffle=False).read_next_batch()
+    rows = batch.to_pandas().to_dict(orient="records")
+
+    assert len(rows) == 1
+    row = rows[0]
+    assert row["repo_id"] == "sample-repo"
+    assert row["source_kind"] == "git_repository"
+    assert row["relative_path"] == "src/example.py"
+    assert row["file_name"] == "example.py"
+    assert row["file_extension"] == ".py"
+    assert row["code_lang"] == "python"
+    assert row["size_bytes"] > 0
+    assert len(row["commit_sha"]) == 40
+    assert len(row["content_sha256"]) == 64
+    assert "def greet" in row["content"]
+
+
+def test_parent_path_discovers_child_git_repositories(tmp_path: Path) -> None:
+    repo = _create_git_repo(tmp_path / "repos" / "child-repo")
+    source = GitHubSeedSource(path=str(repo.parent), file_pattern="*.py")
+    reader = GitHubSeedReader()
+    reader.attach(source, PlaintextResolver())
+
+    batch = reader.create_batch_reader(batch_size=10, index_range=None, shuffle=False).read_next_batch()
+    rows = batch.to_pandas().to_dict(orient="records")
+
+    assert [row["repo_id"] for row in rows] == ["child-repo"]
+
+
+def test_preview_uses_github_seed_reader(tmp_path: Path) -> None:
+    repo = _create_git_repo(tmp_path / "preview-repo")
+    builder = DataDesignerConfigBuilder()
+    builder.with_seed_dataset(GitHubSeedSource(repository_paths=[str(repo)], file_pattern="*.py"))
+    builder.add_column(name="_row_id", column_type="sampler", sampler_type="uuid", params={})
+
+    result = DataDesigner(artifact_path=tmp_path / "artifacts").preview(builder, num_records=1)
+
+    assert result.dataset is not None
+    assert list(result.dataset["repo_id"]) == ["preview-repo"]
+    assert list(result.dataset["relative_path"]) == ["src/example.py"]
+    assert "def greet" in result.dataset["content"].iloc[0]
+
+
+def _create_git_repo(path: Path) -> Path:
+    path.mkdir(parents=True)
+    src = path / "src"
+    src.mkdir()
+    (src / "example.py").write_text(
+        "import os\n\n\ndef greet(name: str) -> str:\n    return f'hello {name} from {os.getcwd()}'\n",
+        encoding="utf-8",
+    )
+    (path / "README.md").write_text("# Sample\n", encoding="utf-8")
+    _git(path, "init", "--quiet")
+    _git(path, "config", "user.email", "test@example.com")
+    _git(path, "config", "user.name", "Test User")
+    _git(path, "add", ".")
+    _git(path, "commit", "--quiet", "-m", "initial")
+    return path
+
+
+def _git(cwd: Path, *args: str) -> None:
+    subprocess.run(["git", *args], cwd=cwd, check=True)
diff --git a/pyproject.toml b/pyproject.toml
index cffc13f..838281c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ ignore = [
 ]
 
 [tool.ruff.lint.isort]
-known-first-party = ["ddp", "data_designer_template"]
+known-first-party = ["ddp", "data_designer_github", "data_designer_template"]
 
 [tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "all"
diff --git a/uv.lock b/uv.lock
index 97c44fd..c8780c1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -9,6 +9,7 @@ resolution-markers = [
 
 [manifest]
 members = [
+    "data-designer-github",
     "data-designer-plugins-workspace",
     "data-designer-template",
     "ddp",
@@ -422,6 +423,17 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/35/d4/3844529ae989be9e63b0b8f47c28492793993427dc7d54d6d2a923ad2acc/data_designer_engine-0.5.7-py3-none-any.whl", hash = "sha256:75cd7d5ad0b230ddf75950ba7f97c9ad75c54887ad1247cdf623dc008e31a418", size = 631945, upload-time = "2026-04-17T22:03:08.584Z" },
 ]
 
+[[package]]
+name = "data-designer-github"
+version = "0.1.0"
+source = { editable = "plugins/data-designer-github" }
+dependencies = [
+    { name = "data-designer" },
+]
+
+[package.metadata]
+requires-dist = [{ name = "data-designer", specifier = ">=0.5.7" }]
+
 [[package]]
 name = "data-designer-plugins-workspace"
 version = "0.0.0"
diff --git a/zensical.toml b/zensical.toml
index 3f1af80..8f3dec1 100644
--- a/zensical.toml
+++ b/zensical.toml
@@ -19,6 +19,10 @@ nav = [
   {"Plugins" = [
     {"Overview" = "plugins/index.md"},
     # BEGIN GENERATED PLUGIN DOCS NAV
+    {"data-designer-github" = [
+      {"Overview" = "plugins/data-designer-github/index.md"},
+      {"Usage" = "plugins/data-designer-github/usage.md"},
+    ]},
     {"data-designer-template" = [
       {"Overview" = "plugins/data-designer-template/index.md"},
       {"Usage" = "plugins/data-designer-template/usage.md"},