diff --git a/docs/api/methods.md b/docs/api/methods.md index 780e800d..32660b37 100644 --- a/docs/api/methods.md +++ b/docs/api/methods.md @@ -1593,7 +1593,7 @@ Returns `null` on success. The scraper continues after the response is sent. Return the latest known metadata scraper status. -This method behaves like `media` does for indexing status: clients can query the current scrape snapshot after opening a UI, then continue listening for `media.scraping` notifications. If no scrape has run since startup, the result is idle with `scraping: false` and `done: false`. +This method behaves like `media` does for indexing status: clients can query the current scrape snapshot after opening a UI, then continue listening for `media.scraping` notifications. If no scrape has run since startup, the result is idle with `scraping: false`, `done: false`, and `state: "idle"`. Existing flat counter fields remain for compatibility; new UIs should prefer `currentSystem` for per-system progress and `totalSteps`/`currentStep`/`currentStepDisplay` for whole-run progress. #### Parameters @@ -1613,6 +1613,12 @@ None. | scraping | boolean | Yes | Whether a scrape is currently running. | | done | boolean | Yes | Whether the latest scrape reached a terminal state. | | paused | boolean | Yes | Whether the active scrape is paused because media is running or until resumed. | +| state | string | No | Explicit lifecycle state: `idle`, `running`, `paused`, `completed`, `cancelled`, or `failed`. | +| error | string | No | Fatal scrape error on failed terminal updates. | +| totalSteps | integer | No | Total systems in the scrape run, when known. | +| currentStep | integer | No | 1-based current system step, when known. | +| currentStepDisplay | string | No | Display name for the current system step, falling back to system ID. | +| currentSystem | object | No | Per-system progress object with `systemId`, `systemName`, `processed`, `total`, `matched`, and `skipped`. | #### Example @@ -1642,7 +1648,19 @@ None. "totalScraped": 1200, "scraping": true, "done": false, - "paused": false + "paused": false, + "state": "running", + "totalSteps": 2, + "currentStep": 1, + "currentStepDisplay": "Super Nintendo Entertainment System", + "currentSystem": { + "systemId": "snes", + "systemName": "Super Nintendo Entertainment System", + "processed": 42, + "total": 100, + "matched": 38, + "skipped": 4 + } } } ``` diff --git a/docs/api/notifications.md b/docs/api/notifications.md index 57e5a0b6..06ade847 100644 --- a/docs/api/notifications.md +++ b/docs/api/notifications.md @@ -217,7 +217,7 @@ Sent during media database generation to indicate indexing progress and completi Sent while a metadata scraper run is active and when it completes. -The first notification for a scraper run identifies the scraper and sets `scraping` to true. Progress notifications include the current system, counters, pause state, and completion state. A final notification has `scraping` set to false and `done` set to true. +The first notification for a scraper run identifies the scraper and sets `scraping` to true. Progress notifications include the current system, per-system counters, whole-run system-step progress, pause state, and completion state. A final notification has `scraping` set to false and `done` set to true. Existing flat counter fields remain for compatibility; new UIs should prefer `currentSystem` for per-system progress and `totalSteps`/`currentStep`/`currentStepDisplay` for whole-run progress. #### Parameters @@ -233,6 +233,12 @@ The first notification for a scraper run identifies the scraper and sets `scrapi | scraping | boolean | Yes | True while scraping is active. | | done | boolean | Yes | True on the terminal update for the scraper run. | | paused | boolean | Yes | True when the active scrape is paused. | +| state | string | No | Explicit lifecycle state: `idle`, `running`, `paused`, `completed`, `cancelled`, or `failed`. | +| error | string | No | Fatal scrape error on failed terminal updates. | +| totalSteps | number | No | Total systems in the scrape run, when known. | +| currentStep | number | No | 1-based current system step, when known. | +| currentStepDisplay | string | No | Display name for the current system step, falling back to system ID. | +| currentSystem | object | No | Per-system progress object with `systemId`, `systemName`, `processed`, `total`, `matched`, and `skipped`. | #### Examples @@ -252,7 +258,19 @@ The first notification for a scraper run identifies the scraper and sets `scrapi "totalScraped": 1200, "scraping": true, "done": false, - "paused": false + "paused": false, + "state": "running", + "totalSteps": 12, + "currentStep": 3, + "currentStepDisplay": "Super Nintendo Entertainment System", + "currentSystem": { + "systemId": "SNES", + "systemName": "Super Nintendo Entertainment System", + "processed": 42, + "total": 100, + "matched": 38, + "skipped": 4 + } } } ``` @@ -273,7 +291,19 @@ The first notification for a scraper run identifies the scraper and sets `scrapi "totalScraped": 1250, "scraping": false, "done": true, - "paused": false + "paused": false, + "state": "completed", + "totalSteps": 12, + "currentStep": 12, + "currentStepDisplay": "Super Nintendo Entertainment System", + "currentSystem": { + "systemId": "SNES", + "systemName": "Super Nintendo Entertainment System", + "processed": 100, + "total": 100, + "matched": 92, + "skipped": 8 + } } } ``` diff --git a/docs/scraper.md b/docs/scraper.md index f30af6a5..e1b29b9f 100644 --- a/docs/scraper.md +++ b/docs/scraper.md @@ -197,11 +197,23 @@ Progress is queryable with `media.scrape.status` and broadcast as `media.scrapin "totalScraped": 1000, "scraping": true, "done": false, - "paused": false + "paused": false, + "state": "running", + "totalSteps": 2, + "currentStep": 1, + "currentStepDisplay": "Super Nintendo Entertainment System", + "currentSystem": { + "systemId": "snes", + "systemName": "Super Nintendo Entertainment System", + "processed": 42, + "total": 100, + "matched": 38, + "skipped": 4 + } } ``` -`totalScraped` is derived from scraper sentinel tags in the database, not from the current run's `matched` count. +`totalScraped` is derived from scraper sentinel tags in the database, not from the current run's `matched` count. Existing flat fields stay for compatibility; new UIs should use `currentSystem` for current-system progress and `totalSteps`/`currentStep`/`currentStepDisplay` for whole-run system-step progress. Only one scraper can run at a time, and scraping is mutually exclusive with media indexing. diff --git a/pkg/api/methods/media_scrape.go b/pkg/api/methods/media_scrape.go index 5e40c7c6..085424d2 100644 --- a/pkg/api/methods/media_scrape.go +++ b/pkg/api/methods/media_scrape.go @@ -28,6 +28,7 @@ import ( "github.com/ZaparooProject/zaparoo-core/v2/pkg/api/models/requests" "github.com/ZaparooProject/zaparoo-core/v2/pkg/api/notifications" "github.com/ZaparooProject/zaparoo-core/v2/pkg/api/validation" + "github.com/ZaparooProject/zaparoo-core/v2/pkg/assets" "github.com/ZaparooProject/zaparoo-core/v2/pkg/database" "github.com/ZaparooProject/zaparoo-core/v2/pkg/database/scraper" "github.com/ZaparooProject/zaparoo-core/v2/pkg/helpers/syncutil" @@ -38,7 +39,15 @@ import ( // scrapingStatus tracks the lifecycle of an active media.scrape operation. // It mirrors the indexingStatus pattern in media.go for consistent state // management and safe concurrent access. -const scrapeTotalScrapedRefreshInterval = 5 * time.Second +const ( + scrapeTotalScrapedRefreshInterval = 5 * time.Second + scrapeStateIdle = "idle" + scrapeStateRunning = "running" + scrapeStatePaused = "paused" + scrapeStateCompleted = "completed" + scrapeStateCancelled = "cancelled" + scrapeStateFailed = "failed" +) type scrapedCountCache struct { lastRefresh time.Time @@ -67,6 +76,7 @@ func (s *scrapingStatus) startIfNotRunning(scraperID string) bool { s.countCache = scrapedCountCache{} s.latest = models.ScrapingStatusResponse{ ScraperID: scraperID, + State: scrapeStateRunning, Scraping: true, } return true @@ -122,6 +132,7 @@ func (s *scrapingStatus) cancel() bool { s.latest.Scraping = false s.latest.Done = true s.latest.Paused = false + s.latest.State = scrapeStateCancelled // Do NOT clear running/scraperID here. The goroutine's deferred // clearIfOwner call is the single writer for those fields, preventing // a new scrape from starting only to have its state cleared by the @@ -243,10 +254,92 @@ func queryScrapedMediaCount(ctx context.Context, db *database.Database, scraperI return count, true } +func systemProgressDisplay(systemID string) string { + if systemID == "" { + return "" + } + md, err := assets.GetSystemMetadata(systemID) + if err != nil || md.Name == "" { + return systemID + } + return md.Name +} + +func ptrIfPositive(v int) *int { + if v <= 0 { + return nil + } + return &v +} + +func ptrIfNotEmpty(v string) *string { + if v == "" { + return nil + } + return &v +} + +func scrapeState(scrapeCtx context.Context, update *scraper.ScrapeUpdate, paused bool) string { + switch { + case update.FatalErr != nil: + return scrapeStateFailed + case update.Done && scrapeCtx != nil && scrapeCtx.Err() != nil: + return scrapeStateCancelled + case update.Done: + return scrapeStateCompleted + case paused: + return scrapeStatePaused + default: + return scrapeStateRunning + } +} + +func scrapingStatusFromUpdate( + scrapeCtx context.Context, + scraperID string, + update *scraper.ScrapeUpdate, + paused bool, +) models.ScrapingStatusResponse { + display := systemProgressDisplay(update.SystemID) + status := models.ScrapingStatusResponse{ + ScraperID: scraperID, + SystemID: update.SystemID, + Processed: update.Processed, + Total: update.Total, + Matched: update.Matched, + Skipped: update.Skipped, + Scraping: !update.Done, + Done: update.Done, + Paused: paused && !update.Done, + State: scrapeState(scrapeCtx, update, paused && !update.Done), + TotalSteps: ptrIfPositive(update.TotalSteps), + CurrentStep: ptrIfPositive(update.CurrentStep), + CurrentStepDisplay: ptrIfNotEmpty(display), + } + if update.FatalErr != nil { + status.Error = update.FatalErr.Error() + } + if update.SystemID != "" { + status.CurrentSystem = &models.ScrapeSystemProgressResponse{ + SystemID: update.SystemID, + SystemName: display, + Processed: update.Processed, + Total: update.Total, + Matched: update.Matched, + Skipped: update.Skipped, + } + } + return status +} + func PublishScrapePauseStatus(ns chan<- models.Notification, paused bool) { status := scrapingStatusInstance.getLatest() status.Scraping = true status.Paused = paused + status.State = scrapeStateRunning + if paused { + status.State = scrapeStatePaused + } publishScrapingStatus(ns, &status) } @@ -300,8 +393,13 @@ func HandleMediaScrape(env requests.RequestEnv) (any, error) { //nolint:gocritic ns := env.State.Notifications db := env.Database + initialState := scrapeStateRunning + if paused { + initialState = scrapeStatePaused + } initialStatus := models.ScrapingStatusResponse{ ScraperID: params.ScraperID, + State: initialState, Scraping: true, Paused: paused, } @@ -320,17 +418,8 @@ func HandleMediaScrape(env requests.RequestEnv) (any, error) { //nolint:gocritic if update.Done { receivedDone = true } - status := models.ScrapingStatusResponse{ - ScraperID: scraperID, - SystemID: update.SystemID, - Processed: update.Processed, - Total: update.Total, - Matched: update.Matched, - Skipped: update.Skipped, - Scraping: !update.Done, - Done: update.Done, - Paused: env.ScrapePauser != nil && env.ScrapePauser.IsPaused() && !update.Done, - } + paused := env.ScrapePauser != nil && env.ScrapePauser.IsPaused() + status := scrapingStatusFromUpdate(scrapeCtx, scraperID, &update, paused) if update.Done { populateScrapedMediaCountExact(env.State.GetContext(), db, &status) } else { @@ -344,21 +433,14 @@ func HandleMediaScrape(env requests.RequestEnv) (any, error) { //nolint:gocritic // Otherwise the channel already delivered the final counters and sending // another zeroed-out Done would overwrite them for consumers. if !receivedDone { - status := scrapingStatusInstance.getLatest() - status.ScraperID = scraperID - status.Scraping = false - status.Done = true - status.Paused = false - terminalStatus := models.ScrapingStatusResponse{ - ScraperID: status.ScraperID, - SystemID: status.SystemID, - Processed: status.Processed, - Total: status.Total, - Matched: status.Matched, - Skipped: status.Skipped, - Scraping: status.Scraping, - Done: status.Done, - Paused: status.Paused, + terminalStatus := scrapingStatusInstance.getLatest() + terminalStatus.ScraperID = scraperID + terminalStatus.Scraping = false + terminalStatus.Done = true + terminalStatus.Paused = false + terminalStatus.State = scrapeStateCompleted + if scrapeCtx.Err() != nil { + terminalStatus.State = scrapeStateCancelled } populateScrapedMediaCountExact(env.State.GetContext(), db, &terminalStatus) publishScrapingStatus(ns, &terminalStatus) @@ -374,8 +456,18 @@ func HandleMediaScrape(env requests.RequestEnv) (any, error) { //nolint:gocritic //nolint:gocritic // API handler signature; large env param cannot be passed by pointer func HandleMediaScrapeStatus(env requests.RequestEnv) (any, error) { status := scrapingStatusInstance.getLatest() + if status.State == "" { + status.State = scrapeStateIdle + if status.Scraping { + status.State = scrapeStateRunning + } + } if status.Scraping && env.ScrapePauser != nil { status.Paused = env.ScrapePauser.IsPaused() + status.State = scrapeStateRunning + if status.Paused { + status.State = scrapeStatePaused + } } if env.Database == nil || env.Database.MediaDB == nil { return status, nil diff --git a/pkg/api/methods/media_scrape_test.go b/pkg/api/methods/media_scrape_test.go index 666fd556..c7a98a4b 100644 --- a/pkg/api/methods/media_scrape_test.go +++ b/pkg/api/methods/media_scrape_test.go @@ -343,6 +343,7 @@ func TestHandleMediaScrapeStatus_TracksLatestProgress(t *testing.T) { assert.Equal(t, models.ScrapingStatusResponse{ ScraperID: "test-scraper", SystemID: "SNES", + State: "running", Processed: 42, Total: 100, Matched: 38, @@ -484,6 +485,7 @@ func TestHandleMediaScrapeCancel_CancelsActive(t *testing.T) { require.True(t, ok) assert.False(t, status.Scraping) assert.True(t, status.Done) + assert.Equal(t, "cancelled", status.State) } func TestHandleMediaScrapeResume_ResumesPausedScrape(t *testing.T) { @@ -518,6 +520,7 @@ func TestHandleMediaScrapeResume_ResumesPausedScrape(t *testing.T) { require.NoError(t, json.Unmarshal(notif.Params, &payload)) assert.True(t, payload.Scraping) assert.False(t, payload.Paused) + assert.Equal(t, "running", payload.State) } func TestHandleMediaScrapeResume_NilPauser(t *testing.T) { @@ -645,6 +648,72 @@ func TestHandleMediaScrape_CachesProgressScrapedCountAndRefreshesDone(t *testing mockDB.AssertExpectations(t) } +func TestHandleMediaScrape_EmitsFatalStatus(t *testing.T) { + // Not parallel — manipulates shared scrapingStatusInstance. + ClearScrapingStatus() + statusInstance.clear() + + mockDB := testhelpers.NewMockMediaDBI() + mockDB.On("TrackBackgroundOperation").Return() + mockDB.On("BackgroundOperationDone").Return() + mockDB.On("GetScrapedMediaCount", assertmock.Anything, "fatal-scraper").Return(0, nil) + + scrapeErr := errors.New("parse failed") + fatalScraper := platforms.Scraper{ + ID: "fatal-scraper", + Name: "Fatal Scraper", + Scrape: func( + _ context.Context, _ *config.Instance, _ platforms.Platform, + _ afero.Fs, _ *database.Database, _ scraper.ScrapeOptions, + _ platforms.ScraperCustomOptions, ch chan<- scraper.ScrapeUpdate, + ) error { + go func() { + defer close(ch) + ch <- scraper.ScrapeUpdate{ + SystemID: "SNES", FatalErr: scrapeErr, Done: true, TotalSteps: 2, CurrentStep: 1, + } + }() + return nil + }, + } + + pl := mocks.NewMockPlatform() + pl.On("Scrapers", assertmock.Anything).Return(map[string]platforms.Scraper{"fatal-scraper": fatalScraper}) + pl.SetupBasicMock() + st, ns := state.NewState(pl, "test") + t.Cleanup(st.StopService) + + _, err := HandleMediaScrape(requests.RequestEnv{ + Context: context.Background(), + Platform: pl, + State: st, + Database: &database.Database{MediaDB: mockDB}, + Params: json.RawMessage(`{"scraperId":"fatal-scraper"}`), + }) + require.NoError(t, err) + + var gotFatal bool + timeout := time.After(2 * time.Second) + for !gotFatal { + select { + case n := <-ns: + if n.Method != models.NotificationMediaScraping { + continue + } + var p models.ScrapingStatusResponse + require.NoError(t, json.Unmarshal(n.Params, &p)) + if p.Done { + assert.Equal(t, "failed", p.State) + assert.Equal(t, "parse failed", p.Error) + gotFatal = true + } + case <-timeout: + t.Fatal("timed out waiting for fatal media.scraping notification") + } + } + mockDB.AssertExpectations(t) +} + func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) { // Not parallel — manipulates shared scrapingStatusInstance. ClearScrapingStatus() @@ -665,9 +734,13 @@ func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) { ) error { go func() { defer close(ch) - ch <- scraper.ScrapeUpdate{SystemID: "SNES", Total: 10, Processed: 5, Matched: 4, Skipped: 1} ch <- scraper.ScrapeUpdate{ - SystemID: "SNES", Total: 10, Processed: 10, Matched: 8, Skipped: 2, Done: true, + SystemID: "SNES", Total: 10, Processed: 5, Matched: 4, Skipped: 1, + TotalSteps: 2, CurrentStep: 1, + } + ch <- scraper.ScrapeUpdate{ + SystemID: "SNES", Total: 10, Processed: 10, Matched: 8, Skipped: 2, + TotalSteps: 2, CurrentStep: 1, Done: true, } }() return nil @@ -696,6 +769,7 @@ func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) { var gotDone bool var maxProcessed int + var sawCurrentSystem bool timeout := time.After(2 * time.Second) for !gotDone { select { @@ -708,6 +782,17 @@ func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) { if p.Processed > maxProcessed { maxProcessed = p.Processed } + if p.CurrentSystem != nil && !p.Done { + sawCurrentSystem = true + assert.Equal(t, "SNES", p.CurrentSystem.SystemID) + assert.Equal(t, p.Processed, p.CurrentSystem.Processed) + assert.Equal(t, p.Total, p.CurrentSystem.Total) + require.NotNil(t, p.TotalSteps) + require.NotNil(t, p.CurrentStep) + assert.Equal(t, 2, *p.TotalSteps) + assert.Equal(t, 1, *p.CurrentStep) + assert.Equal(t, "running", p.State) + } if p.Done { gotDone = true } @@ -717,6 +802,7 @@ func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) { } assert.Equal(t, 10, maxProcessed, "progress updates should reflect actual processed count") + assert.True(t, sawCurrentSystem, "progress updates should expose currentSystem and step fields") require.Eventually(t, func() bool { return !IsScrapingRunning() }, 2*time.Second, 10*time.Millisecond) diff --git a/pkg/api/models/responses.go b/pkg/api/models/responses.go index 1c90730b..cd331d34 100644 --- a/pkg/api/models/responses.go +++ b/pkg/api/models/responses.go @@ -290,19 +290,34 @@ type MediaImageResponse struct { TypeTag string `json:"typeTag"` // e.g. "property:image-boxart" } +type ScrapeSystemProgressResponse struct { + SystemID string `json:"systemId"` + SystemName string `json:"systemName,omitempty"` + Processed int `json:"processed"` + Total int `json:"total"` + Matched int `json:"matched"` + Skipped int `json:"skipped"` +} + // ScrapingStatusResponse is broadcast as a "media.scraping" notification for // each ScrapeUpdate received from the scraper and on completion/cancellation. type ScrapingStatusResponse struct { - ScraperID string `json:"scraperId,omitempty"` - SystemID string `json:"systemId,omitempty"` - Processed int `json:"processed"` - Total int `json:"total"` - Matched int `json:"matched"` - Skipped int `json:"skipped"` - TotalScraped int `json:"totalScraped"` - Scraping bool `json:"scraping"` - Done bool `json:"done"` - Paused bool `json:"paused"` + CurrentStep *int `json:"currentStep,omitempty"` + CurrentStepDisplay *string `json:"currentStepDisplay,omitempty"` + TotalSteps *int `json:"totalSteps,omitempty"` + CurrentSystem *ScrapeSystemProgressResponse `json:"currentSystem,omitempty"` + ScraperID string `json:"scraperId,omitempty"` + SystemID string `json:"systemId,omitempty"` + State string `json:"state,omitempty"` + Error string `json:"error,omitempty"` + Processed int `json:"processed"` + Total int `json:"total"` + Matched int `json:"matched"` + Skipped int `json:"skipped"` + TotalScraped int `json:"totalScraped"` + Scraping bool `json:"scraping"` + Done bool `json:"done"` + Paused bool `json:"paused"` } type MediaLookupMatch struct { diff --git a/pkg/api/notifications/notifications_test.go b/pkg/api/notifications/notifications_test.go index 5b631567..0510f54e 100644 --- a/pkg/api/notifications/notifications_test.go +++ b/pkg/api/notifications/notifications_test.go @@ -252,14 +252,29 @@ func TestMediaScraping_Payload(t *testing.T) { ns := make(chan models.Notification, 1) + totalSteps := 5 + currentStep := 2 + currentStepDisplay := "Nintendo Entertainment System" payload := models.ScrapingStatusResponse{ - ScraperID: "gamelist.xml", - SystemID: "nes", - Processed: 42, - Total: 100, - Matched: 38, - Skipped: 4, - Scraping: true, + ScraperID: "gamelist.xml", + SystemID: "nes", + State: "running", + Processed: 42, + Total: 100, + Matched: 38, + Skipped: 4, + Scraping: true, + TotalSteps: &totalSteps, + CurrentStep: ¤tStep, + CurrentStepDisplay: ¤tStepDisplay, + CurrentSystem: &models.ScrapeSystemProgressResponse{ + SystemID: "nes", + SystemName: currentStepDisplay, + Processed: 42, + Total: 100, + Matched: 38, + Skipped: 4, + }, } MediaScraping(ns, &payload) @@ -276,6 +291,15 @@ func TestMediaScraping_Payload(t *testing.T) { assert.Equal(t, payload.Total, received.Total) assert.Equal(t, payload.Matched, received.Matched) assert.Equal(t, payload.Skipped, received.Skipped) + assert.Equal(t, payload.State, received.State) + require.NotNil(t, received.TotalSteps) + require.NotNil(t, received.CurrentStep) + require.NotNil(t, received.CurrentStepDisplay) + require.NotNil(t, received.CurrentSystem) + assert.Equal(t, totalSteps, *received.TotalSteps) + assert.Equal(t, currentStep, *received.CurrentStep) + assert.Equal(t, currentStepDisplay, *received.CurrentStepDisplay) + assert.Equal(t, "nes", received.CurrentSystem.SystemID) assert.True(t, received.Scraping) } diff --git a/pkg/database/scraper/gamelistxml/scraper.go b/pkg/database/scraper/gamelistxml/scraper.go index 1a8eeaf3..b8ff5d4a 100644 --- a/pkg/database/scraper/gamelistxml/scraper.go +++ b/pkg/database/scraper/gamelistxml/scraper.go @@ -532,33 +532,45 @@ func (g *GamelistXMLScraper) scrapeLoop( const id = "gamelist.xml" var totalProcessed, totalMatched, totalSkipped int + totalSteps := len(systems) - waitForResume := func(systemID string, processed, matched, skipped int) bool { + waitForResume := func(systemID string, currentStep, processed, matched, skipped int) bool { if waitErr := opts.Pauser.Wait(ctx); waitErr != nil { ch <- scraper.ScrapeUpdate{ - SystemID: systemID, - Processed: processed, - Matched: matched, - Skipped: skipped, - Done: true, + SystemID: systemID, + Processed: processed, + Matched: matched, + Skipped: skipped, + TotalSteps: totalSteps, + CurrentStep: currentStep, + Done: true, } return false } return true } - for _, system := range systems { - if !waitForResume(system.ID, 0, 0, 0) { + for i, system := range systems { + currentStep := i + 1 + sendUpdate := func(update scraper.ScrapeUpdate) { + update.TotalSteps = totalSteps + update.CurrentStep = currentStep + ch <- update + } + if !waitForResume(system.ID, currentStep, totalProcessed, totalMatched, totalSkipped) { return } select { case <-ctx.Done(): ch <- scraper.ScrapeUpdate{ - Done: true, Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped, + Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped, + TotalSteps: totalSteps, CurrentStep: currentStep, Done: true, } return - case ch <- scraper.ScrapeUpdate{SystemID: system.ID, Total: 0}: + case ch <- scraper.ScrapeUpdate{ + SystemID: system.ID, Total: 0, TotalSteps: totalSteps, CurrentStep: currentStep, + }: } titlesBySlug := make(map[string]database.MediaTitle) @@ -567,10 +579,10 @@ func (g *GamelistXMLScraper) scrapeLoop( allTitles, titlesErr := mdb.GetTitlesBySystemID(system.ID) if titlesErr != nil { if errors.Is(titlesErr, context.Canceled) || errors.Is(titlesErr, context.DeadlineExceeded) { - ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true}) return } - ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: titlesErr, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: titlesErr, Done: true}) return } for _, t := range allTitles { @@ -586,10 +598,10 @@ func (g *GamelistXMLScraper) scrapeLoop( unscraped, titlesErr := mdb.FindMediaTitlesWithoutSentinel(ctx, system.DBID, sentinelTag) if titlesErr != nil { if errors.Is(titlesErr, context.Canceled) || errors.Is(titlesErr, context.DeadlineExceeded) { - ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true}) return } - ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: titlesErr, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: titlesErr, Done: true}) return } for _, t := range unscraped { @@ -598,10 +610,10 @@ func (g *GamelistXMLScraper) scrapeLoop( allTitles, allTitlesErr := mdb.GetTitlesBySystemID(system.ID) if allTitlesErr != nil { if errors.Is(allTitlesErr, context.Canceled) || errors.Is(allTitlesErr, context.DeadlineExceeded) { - ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true}) return } - ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: allTitlesErr, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: allTitlesErr, Done: true}) return } for _, t := range allTitles { @@ -614,10 +626,10 @@ func (g *GamelistXMLScraper) scrapeLoop( allMedia, mediaErr := mdb.GetMediaBySystemID(system.ID) if mediaErr != nil { if errors.Is(mediaErr, context.Canceled) || errors.Is(mediaErr, context.DeadlineExceeded) { - ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true}) return } - ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: mediaErr, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: mediaErr, Done: true}) return } scrapedIDs := map[int64]struct{}{} @@ -626,10 +638,10 @@ func (g *GamelistXMLScraper) scrapeLoop( scrapedIDs, scrapedErr = mdb.GetScrapedMediaIDs(ctx, id, system.DBID) if scrapedErr != nil { if errors.Is(scrapedErr, context.Canceled) || errors.Is(scrapedErr, context.DeadlineExceeded) { - ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true}) return } - ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: scrapedErr, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: scrapedErr, Done: true}) return } } @@ -660,26 +672,30 @@ func (g *GamelistXMLScraper) scrapeLoop( parsed, parseErr := g.loadParsedGamelistSystem(ctx, system) if parseErr != nil { if errors.Is(parseErr, context.Canceled) || errors.Is(parseErr, context.DeadlineExceeded) { - ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true}) return } - ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: parseErr, Done: true} + sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: parseErr, Done: true}) return } - companion := g.processCompanionEntriesFromParsed(ctx, opts, system, mdb, indexes, parsed, ch) - if !waitForResume(system.ID, companion.Processed, companion.Matched, companion.Skipped) { + companion := g.processCompanionEntriesFromParsed( + ctx, opts, system, mdb, indexes, parsed, ch, totalSteps, currentStep, + ) + if !waitForResume(system.ID, currentStep, companion.Processed, companion.Matched, companion.Skipped) { return } if len(indexes.TitlesBySlug) == 0 && len(indexes.MediaByPathFold) == 0 { if companion.Processed > 0 { ch <- scraper.ScrapeUpdate{ - SystemID: system.ID, - Total: companion.Processed, - Processed: companion.Processed, - Matched: companion.Matched, - Skipped: companion.Skipped, + SystemID: system.ID, + Total: companion.Processed, + Processed: companion.Processed, + Matched: companion.Matched, + Skipped: companion.Skipped, + TotalSteps: totalSteps, + CurrentStep: currentStep, } } totalProcessed += companion.Processed @@ -691,16 +707,16 @@ func (g *GamelistXMLScraper) scrapeLoop( records, loadErr := g.loadRecordsFromParsed(ctx, system, indexes, parsed) if loadErr != nil { if errors.Is(loadErr, context.Canceled) || errors.Is(loadErr, context.DeadlineExceeded) { - ch <- scraper.ScrapeUpdate{ + sendUpdate(scraper.ScrapeUpdate{ SystemID: system.ID, Done: true, Processed: companion.Processed, Matched: companion.Matched, Skipped: companion.Skipped, - } + }) return } - ch <- scraper.ScrapeUpdate{ + sendUpdate(scraper.ScrapeUpdate{ SystemID: system.ID, FatalErr: loadErr, Done: true, Processed: companion.Processed, Matched: companion.Matched, Skipped: companion.Skipped, - } + }) return } @@ -712,19 +728,23 @@ func (g *GamelistXMLScraper) scrapeLoop( select { case <-ctx.Done(): ch <- scraper.ScrapeUpdate{ - Done: true, Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped, + Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped, + TotalSteps: totalSteps, CurrentStep: currentStep, Done: true, } return case ch <- scraper.ScrapeUpdate{ - SystemID: system.ID, - Total: systemTotal, - Processed: companion.Processed, - Matched: companion.Matched, - Skipped: companion.Skipped, + SystemID: system.ID, + Total: systemTotal, + Processed: companion.Processed, + Matched: companion.Matched, + Skipped: companion.Skipped, + TotalSteps: totalSteps, + CurrentStep: currentStep, }: } if !waitForResume( system.ID, + currentStep, companion.Processed+processed, companion.Matched+matched, companion.Skipped+skipped, @@ -742,14 +762,18 @@ func (g *GamelistXMLScraper) scrapeLoop( update.Processed += companion.Processed update.Matched += companion.Matched update.Skipped += companion.Skipped + update.TotalSteps = totalSteps + update.CurrentStep = currentStep select { case <-ctx.Done(): ch <- scraper.ScrapeUpdate{ - SystemID: system.ID, - Processed: companion.Processed + processed, - Matched: companion.Matched + matched, - Skipped: companion.Skipped + skipped, - Done: true, + SystemID: system.ID, + Processed: companion.Processed + processed, + Matched: companion.Matched + matched, + Skipped: companion.Skipped + skipped, + TotalSteps: totalSteps, + CurrentStep: currentStep, + Done: true, } return false case ch <- update: @@ -761,6 +785,7 @@ func (g *GamelistXMLScraper) scrapeLoop( for _, record := range records { if !waitForResume( system.ID, + currentStep, companion.Processed+processed, companion.Matched+matched, companion.Skipped+skipped, @@ -770,11 +795,13 @@ func (g *GamelistXMLScraper) scrapeLoop( select { case <-ctx.Done(): ch <- scraper.ScrapeUpdate{ - SystemID: system.ID, - Processed: companion.Processed + processed, - Matched: companion.Matched + matched, - Skipped: companion.Skipped + skipped, - Done: true, + SystemID: system.ID, + Processed: companion.Processed + processed, + Matched: companion.Matched + matched, + Skipped: companion.Skipped + skipped, + TotalSteps: totalSteps, + CurrentStep: currentStep, + Done: true, } return default: @@ -809,6 +836,7 @@ func (g *GamelistXMLScraper) scrapeLoop( } if !waitForResume( system.ID, + currentStep, companion.Processed+processed, companion.Matched+matched, companion.Skipped+skipped, @@ -860,7 +888,10 @@ func (g *GamelistXMLScraper) scrapeLoop( totalSkipped += companion.Skipped + skipped } - ch <- scraper.ScrapeUpdate{Done: true, Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped} + ch <- scraper.ScrapeUpdate{ + Done: true, Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped, + TotalSteps: totalSteps, CurrentStep: totalSteps, + } } // MapToDB converts a GamelistRecord into the tag and property writes to apply @@ -1595,7 +1626,7 @@ func (g *GamelistXMLScraper) processCompanionEntries( if err != nil { return companionStats{} } - return g.processCompanionEntriesFromParsed(ctx, opts, system, mdb, indexes, parsed, ch) + return g.processCompanionEntriesFromParsed(ctx, opts, system, mdb, indexes, parsed, ch, 0, 0) } func (g *GamelistXMLScraper) processCompanionEntriesFromParsed( @@ -1606,6 +1637,8 @@ func (g *GamelistXMLScraper) processCompanionEntriesFromParsed( indexes loadRecordIndexes, parsed parsedGamelistSystem, ch chan<- scraper.ScrapeUpdate, + totalSteps int, + currentStep int, ) companionStats { parents, children := companionEntriesFromParsed(ctx, system, parsed) if len(parents) == 0 && len(children) == 0 { @@ -1642,11 +1675,13 @@ func (g *GamelistXMLScraper) processCompanionEntriesFromParsed( case <-ctx.Done(): return false case ch <- scraper.ScrapeUpdate{ - SystemID: system.ID, - Total: len(children), - Processed: stats.Processed, - Matched: stats.Matched, - Skipped: stats.Skipped, + SystemID: system.ID, + Total: len(children), + Processed: stats.Processed, + Matched: stats.Matched, + Skipped: stats.Skipped, + TotalSteps: totalSteps, + CurrentStep: currentStep, }: lastProgress = time.Now() return true diff --git a/pkg/database/scraper/gamelistxml/scraper_test.go b/pkg/database/scraper/gamelistxml/scraper_test.go index f767a577..6c684fa2 100644 --- a/pkg/database/scraper/gamelistxml/scraper_test.go +++ b/pkg/database/scraper/gamelistxml/scraper_test.go @@ -2647,6 +2647,68 @@ func TestScrapeLoop_ProgressIsPerSystem(t *testing.T) { mockDB.AssertExpectations(t) } +func TestScrapeLoop_PauseCancelBeforeNextSystemPreservesProgress(t *testing.T) { + t.Parallel() + + root1 := t.TempDir() + root2 := t.TempDir() + recordPath := filepath.Join(root1, "first.nes") + require.NoError(t, os.WriteFile(filepath.Join(root1, "gamelist.xml"), []byte(` + + ./first.nesFirst +`), 0o600)) + require.NoError(t, os.WriteFile(filepath.Join(root2, "gamelist.xml"), []byte(` + + ./second.sfcSecond +`), 0o600)) + + pauser := syncutil.NewPauser() + paused := make(chan struct{}) + mockDB := helpers.NewMockMediaDBI() + mockDB.On("GetTitlesBySystemID", "nes").Return([]database.TitleWithSystem{{ + DBID: 1, SystemDBID: 10, Slug: "first", Name: "First", + }}, nil) + mockDB.On("GetMediaBySystemID", "nes").Return([]database.MediaWithFullPath{{ + DBID: 11, MediaTitleDBID: 1, Path: recordPath, + }}, nil) + mockDB.On("ApplyScrapeResult", mock.Anything, int64(11), int64(1), mock.Anything). + Run(func(_ mock.Arguments) { + pauser.Pause() + close(paused) + }). + Return(nil) + + s := &GamelistXMLScraper{db: mockDB} + ch := make(chan scraper.ScrapeUpdate, 128) + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan struct{}) + go func() { + defer close(done) + s.scrapeLoop(ctx, scraper.ScrapeOptions{Pauser: pauser, Force: true}, []scraper.ScrapeSystem{ + {ID: "nes", ROMPaths: []string{root1}, DBID: 10}, + {ID: "snes", ROMPaths: []string{root2}, DBID: 20}, + }, mockDB, ch) + }() + + <-paused + cancel() + <-done + + updates := drainChannel(ch) + var doneUpdate scraper.ScrapeUpdate + for _, update := range updates { + if update.Done { + doneUpdate = update + } + } + require.True(t, doneUpdate.Done) + assert.Equal(t, "snes", doneUpdate.SystemID) + assert.Equal(t, 1, doneUpdate.Processed) + assert.Equal(t, 1, doneUpdate.Matched) + assert.Equal(t, 0, doneUpdate.Skipped) + mockDB.AssertExpectations(t) +} + func TestScrapeLoop_ProgressIncludesCompanionBaseline(t *testing.T) { t.Parallel() diff --git a/pkg/database/scraper/scraper.go b/pkg/database/scraper/scraper.go index ad2b6ad6..44199896 100644 --- a/pkg/database/scraper/scraper.go +++ b/pkg/database/scraper/scraper.go @@ -54,14 +54,16 @@ type ScrapeOptions struct { // ScrapeUpdate is one progress event emitted on the channel returned by Scrape. type ScrapeUpdate struct { - Err error - FatalErr error - SystemID string - Processed int - Total int - Matched int - Skipped int - Done bool + Err error + FatalErr error + SystemID string + Processed int + Total int + Matched int + Skipped int + TotalSteps int + CurrentStep int + Done bool } // MatchResult is the output of a successful Match call. Both IDs must be