Skip to content

Commit 0e9c829

Browse files
[GitHub] Add exclude.size property to the config (#137)
1 parent c35f6bc commit 0e9c829

File tree

6 files changed

+73
-0
lines changed

6 files changed

+73
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111

1212
- Added config option `settings.reindexInterval` and `settings.resyncInterval` to control how often the index should be re-indexed and re-synced. ([#134](https://github.com/sourcebot-dev/sourcebot/pull/134))
13+
- Added `exclude.size` to the GitHub config to allow excluding repositories by size. ([#137](https://github.com/sourcebot-dev/sourcebot/pull/137))
1314

1415
## [2.6.2] - 2024-12-13
1516

demo-site-config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
"token": {
1212
"env": "GITHUB_TOKEN"
1313
},
14+
"exclude": {
15+
"size": {
16+
"max": 1000000000 // Limit to 1GB
17+
}
18+
},
1419
"repos": [
1520
"torvalds/linux",
1621
"pytorch/pytorch",

packages/backend/src/github.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ type OctokitRepository = {
2222
forks_count?: number,
2323
archived?: boolean,
2424
topics?: string[],
25+
size?: number,
2526
}
2627

2728
export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => {
@@ -94,6 +95,7 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
9495
'zoekt.fork': marshalBool(repo.fork),
9596
'zoekt.public': marshalBool(repo.private === false)
9697
},
98+
sizeInBytes: repo.size ? repo.size * 1000 : undefined,
9799
branches: [],
98100
tags: [],
99101
} satisfies GitRepository;
@@ -121,6 +123,42 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
121123
const topics = config.exclude.topics.map(topic => topic.toLowerCase());
122124
repos = excludeReposByTopic(repos, topics, logger);
123125
}
126+
127+
if (config.exclude.size) {
128+
const min = config.exclude.size.min;
129+
const max = config.exclude.size.max;
130+
if (min) {
131+
repos = repos.filter((repo) => {
132+
// If we don't have a size, we can't filter by size.
133+
if (!repo.sizeInBytes) {
134+
return true;
135+
}
136+
137+
if (repo.sizeInBytes < min) {
138+
logger.debug(`Excluding repo ${repo.name}. Reason: repo is less than \`exclude.size.min\`=${min} bytes.`);
139+
return false;
140+
}
141+
142+
return true;
143+
});
144+
}
145+
146+
if (max) {
147+
repos = repos.filter((repo) => {
148+
// If we don't have a size, we can't filter by size.
149+
if (!repo.sizeInBytes) {
150+
return true;
151+
}
152+
153+
if (repo.sizeInBytes > max) {
154+
logger.debug(`Excluding repo ${repo.name}. Reason: repo is greater than \`exclude.size.max\`=${max} bytes.`);
155+
return false;
156+
}
157+
158+
return true;
159+
});
160+
}
161+
}
124162
}
125163

126164
logger.debug(`Found ${repos.length} total repositories.`);

packages/backend/src/schemas/v2.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,19 @@ export interface GitHubConfig {
8989
* List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.
9090
*/
9191
topics?: string[];
92+
/**
93+
* Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned.
94+
*/
95+
size?: {
96+
/**
97+
* Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing.
98+
*/
99+
min?: number;
100+
/**
101+
* Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing.
102+
*/
103+
max?: number;
104+
};
92105
};
93106
revisions?: GitRevisions;
94107
}

packages/backend/src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ interface BaseRepository {
99
isArchived?: boolean;
1010
codeHost?: string;
1111
topics?: string[];
12+
sizeInBytes?: number;
1213
}
1314

1415
export interface GitRepository extends BaseRepository {

schemas/v2/index.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,21 @@
171171
"examples": [
172172
["tests", "ci"]
173173
]
174+
},
175+
"size": {
176+
"type": "object",
177+
"description": "Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned.",
178+
"properties": {
179+
"min": {
180+
"type": "integer",
181+
"description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
182+
},
183+
"max": {
184+
"type": "integer",
185+
"description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
186+
}
187+
},
188+
"additionalProperties": false
174189
}
175190
},
176191
"additionalProperties": false

0 commit comments

Comments
 (0)