@@ -12,6 +12,7 @@ import { PromClient } from './promClient.js';
1212import * as Sentry from "@sentry/node" ;
1313
1414interface IRepoManager {
15+ validateIndexedReposHaveShards : ( ) => Promise < void > ;
1516 blockingPollLoop : ( ) => void ;
1617 dispose : ( ) => void ;
1718}
@@ -526,6 +527,61 @@ export class RepoManager implements IRepoManager {
526527 }
527528 }
528529
530+ ///////////////////////////
531+ // Repo index validation
532+ ///////////////////////////
533+
534+ public async validateIndexedReposHaveShards ( ) {
535+ logger . info ( 'Validating indexed repos have shards...' ) ;
536+
537+ const indexedRepos = await this . db . repo . findMany ( {
538+ where : {
539+ repoIndexingStatus : RepoIndexingStatus . INDEXED
540+ }
541+ } ) ;
542+ logger . info ( `Found ${ indexedRepos . length } repos in the DB marked as INDEXED` ) ;
543+
544+ if ( indexedRepos . length === 0 ) {
545+ return ;
546+ }
547+
548+ const reposToReindex : number [ ] = [ ] ;
549+
550+ for ( const repo of indexedRepos ) {
551+ const shardPrefix = getShardPrefix ( repo . orgId , repo . id ) ;
552+
553+ // TODO: this doesn't take into account if a repo has multiple shards and only some of them are missing. To support that, this logic
554+ // would need to know how many total shards are expected for this repo
555+ let hasShards = false ;
556+ try {
557+ const files = readdirSync ( this . ctx . indexPath ) ;
558+ hasShards = files . some ( file => file . startsWith ( shardPrefix ) ) ;
559+ } catch ( error ) {
560+ logger . error ( `Failed to read index directory ${ this . ctx . indexPath } : ${ error } ` ) ;
561+ continue ;
562+ }
563+
564+ if ( ! hasShards ) {
565+ logger . info ( `Repo ${ repo . displayName } (id: ${ repo . id } ) is marked as INDEXED but has no shards on disk. Marking for reindexing.` ) ;
566+ reposToReindex . push ( repo . id ) ;
567+ }
568+ }
569+
570+ if ( reposToReindex . length > 0 ) {
571+ await this . db . repo . updateMany ( {
572+ where : {
573+ id : { in : reposToReindex }
574+ } ,
575+ data : {
576+ repoIndexingStatus : RepoIndexingStatus . NEW
577+ }
578+ } ) ;
579+ logger . info ( `Marked ${ reposToReindex . length } repos for reindexing due to missing shards` ) ;
580+ }
581+
582+ logger . info ( 'Done validating indexed repos have shards' ) ;
583+ }
584+
529585 private async fetchAndScheduleRepoTimeouts ( ) {
530586 const repos = await this . db . repo . findMany ( {
531587 where : {
0 commit comments