diff --git a/docs/api/methods.md b/docs/api/methods.md
index 780e800d..32660b37 100644
--- a/docs/api/methods.md
+++ b/docs/api/methods.md
@@ -1593,7 +1593,7 @@ Returns `null` on success. The scraper continues after the response is sent.
Return the latest known metadata scraper status.
-This method behaves like `media` does for indexing status: clients can query the current scrape snapshot after opening a UI, then continue listening for `media.scraping` notifications. If no scrape has run since startup, the result is idle with `scraping: false` and `done: false`.
+This method behaves like `media` does for indexing status: clients can query the current scrape snapshot after opening a UI, then continue listening for `media.scraping` notifications. If no scrape has run since startup, the result is idle with `scraping: false`, `done: false`, and `state: "idle"`. Existing flat counter fields remain for compatibility; new UIs should prefer `currentSystem` for per-system progress and `totalSteps`/`currentStep`/`currentStepDisplay` for whole-run progress.
#### Parameters
@@ -1613,6 +1613,12 @@ None.
| scraping | boolean | Yes | Whether a scrape is currently running. |
| done | boolean | Yes | Whether the latest scrape reached a terminal state. |
| paused | boolean | Yes | Whether the active scrape is paused because media is running or until resumed. |
+| state | string | No | Explicit lifecycle state: `idle`, `running`, `paused`, `completed`, `cancelled`, or `failed`. |
+| error | string | No | Fatal scrape error on failed terminal updates. |
+| totalSteps | integer | No | Total systems in the scrape run, when known. |
+| currentStep | integer | No | 1-based current system step, when known. |
+| currentStepDisplay | string | No | Display name for the current system step, falling back to system ID. |
+| currentSystem | object | No | Per-system progress object with `systemId`, `systemName`, `processed`, `total`, `matched`, and `skipped`. |
#### Example
@@ -1642,7 +1648,19 @@ None.
"totalScraped": 1200,
"scraping": true,
"done": false,
- "paused": false
+ "paused": false,
+ "state": "running",
+ "totalSteps": 2,
+ "currentStep": 1,
+ "currentStepDisplay": "Super Nintendo Entertainment System",
+ "currentSystem": {
+ "systemId": "snes",
+ "systemName": "Super Nintendo Entertainment System",
+ "processed": 42,
+ "total": 100,
+ "matched": 38,
+ "skipped": 4
+ }
}
}
```
diff --git a/docs/api/notifications.md b/docs/api/notifications.md
index 57e5a0b6..06ade847 100644
--- a/docs/api/notifications.md
+++ b/docs/api/notifications.md
@@ -217,7 +217,7 @@ Sent during media database generation to indicate indexing progress and completi
Sent while a metadata scraper run is active and when it completes.
-The first notification for a scraper run identifies the scraper and sets `scraping` to true. Progress notifications include the current system, counters, pause state, and completion state. A final notification has `scraping` set to false and `done` set to true.
+The first notification for a scraper run identifies the scraper and sets `scraping` to true. Progress notifications include the current system, per-system counters, whole-run system-step progress, pause state, and completion state. A final notification has `scraping` set to false and `done` set to true. Existing flat counter fields remain for compatibility; new UIs should prefer `currentSystem` for per-system progress and `totalSteps`/`currentStep`/`currentStepDisplay` for whole-run progress.
#### Parameters
@@ -233,6 +233,12 @@ The first notification for a scraper run identifies the scraper and sets `scrapi
| scraping | boolean | Yes | True while scraping is active. |
| done | boolean | Yes | True on the terminal update for the scraper run. |
| paused | boolean | Yes | True when the active scrape is paused. |
+| state | string | No | Explicit lifecycle state: `idle`, `running`, `paused`, `completed`, `cancelled`, or `failed`. |
+| error | string | No | Fatal scrape error on failed terminal updates. |
+| totalSteps | number | No | Total systems in the scrape run, when known. |
+| currentStep | number | No | 1-based current system step, when known. |
+| currentStepDisplay | string | No | Display name for the current system step, falling back to system ID. |
+| currentSystem | object | No | Per-system progress object with `systemId`, `systemName`, `processed`, `total`, `matched`, and `skipped`. |
#### Examples
@@ -252,7 +258,19 @@ The first notification for a scraper run identifies the scraper and sets `scrapi
"totalScraped": 1200,
"scraping": true,
"done": false,
- "paused": false
+ "paused": false,
+ "state": "running",
+ "totalSteps": 12,
+ "currentStep": 3,
+ "currentStepDisplay": "Super Nintendo Entertainment System",
+ "currentSystem": {
+ "systemId": "SNES",
+ "systemName": "Super Nintendo Entertainment System",
+ "processed": 42,
+ "total": 100,
+ "matched": 38,
+ "skipped": 4
+ }
}
}
```
@@ -273,7 +291,19 @@ The first notification for a scraper run identifies the scraper and sets `scrapi
"totalScraped": 1250,
"scraping": false,
"done": true,
- "paused": false
+ "paused": false,
+ "state": "completed",
+ "totalSteps": 12,
+ "currentStep": 12,
+ "currentStepDisplay": "Super Nintendo Entertainment System",
+ "currentSystem": {
+ "systemId": "SNES",
+ "systemName": "Super Nintendo Entertainment System",
+ "processed": 100,
+ "total": 100,
+ "matched": 92,
+ "skipped": 8
+ }
}
}
```
diff --git a/docs/scraper.md b/docs/scraper.md
index f30af6a5..e1b29b9f 100644
--- a/docs/scraper.md
+++ b/docs/scraper.md
@@ -197,11 +197,23 @@ Progress is queryable with `media.scrape.status` and broadcast as `media.scrapin
"totalScraped": 1000,
"scraping": true,
"done": false,
- "paused": false
+ "paused": false,
+ "state": "running",
+ "totalSteps": 2,
+ "currentStep": 1,
+ "currentStepDisplay": "Super Nintendo Entertainment System",
+ "currentSystem": {
+ "systemId": "snes",
+ "systemName": "Super Nintendo Entertainment System",
+ "processed": 42,
+ "total": 100,
+ "matched": 38,
+ "skipped": 4
+ }
}
```
-`totalScraped` is derived from scraper sentinel tags in the database, not from the current run's `matched` count.
+`totalScraped` is derived from scraper sentinel tags in the database, not from the current run's `matched` count. Existing flat fields stay for compatibility; new UIs should use `currentSystem` for current-system progress and `totalSteps`/`currentStep`/`currentStepDisplay` for whole-run system-step progress.
Only one scraper can run at a time, and scraping is mutually exclusive with media indexing.
diff --git a/pkg/api/methods/media_scrape.go b/pkg/api/methods/media_scrape.go
index 5e40c7c6..085424d2 100644
--- a/pkg/api/methods/media_scrape.go
+++ b/pkg/api/methods/media_scrape.go
@@ -28,6 +28,7 @@ import (
"github.com/ZaparooProject/zaparoo-core/v2/pkg/api/models/requests"
"github.com/ZaparooProject/zaparoo-core/v2/pkg/api/notifications"
"github.com/ZaparooProject/zaparoo-core/v2/pkg/api/validation"
+ "github.com/ZaparooProject/zaparoo-core/v2/pkg/assets"
"github.com/ZaparooProject/zaparoo-core/v2/pkg/database"
"github.com/ZaparooProject/zaparoo-core/v2/pkg/database/scraper"
"github.com/ZaparooProject/zaparoo-core/v2/pkg/helpers/syncutil"
@@ -38,7 +39,15 @@ import (
// scrapingStatus tracks the lifecycle of an active media.scrape operation.
// It mirrors the indexingStatus pattern in media.go for consistent state
// management and safe concurrent access.
-const scrapeTotalScrapedRefreshInterval = 5 * time.Second
+const (
+ scrapeTotalScrapedRefreshInterval = 5 * time.Second
+ scrapeStateIdle = "idle"
+ scrapeStateRunning = "running"
+ scrapeStatePaused = "paused"
+ scrapeStateCompleted = "completed"
+ scrapeStateCancelled = "cancelled"
+ scrapeStateFailed = "failed"
+)
type scrapedCountCache struct {
lastRefresh time.Time
@@ -67,6 +76,7 @@ func (s *scrapingStatus) startIfNotRunning(scraperID string) bool {
s.countCache = scrapedCountCache{}
s.latest = models.ScrapingStatusResponse{
ScraperID: scraperID,
+ State: scrapeStateRunning,
Scraping: true,
}
return true
@@ -122,6 +132,7 @@ func (s *scrapingStatus) cancel() bool {
s.latest.Scraping = false
s.latest.Done = true
s.latest.Paused = false
+ s.latest.State = scrapeStateCancelled
// Do NOT clear running/scraperID here. The goroutine's deferred
// clearIfOwner call is the single writer for those fields, preventing
// a new scrape from starting only to have its state cleared by the
@@ -243,10 +254,92 @@ func queryScrapedMediaCount(ctx context.Context, db *database.Database, scraperI
return count, true
}
+func systemProgressDisplay(systemID string) string {
+ if systemID == "" {
+ return ""
+ }
+ md, err := assets.GetSystemMetadata(systemID)
+ if err != nil || md.Name == "" {
+ return systemID
+ }
+ return md.Name
+}
+
+func ptrIfPositive(v int) *int {
+ if v <= 0 {
+ return nil
+ }
+ return &v
+}
+
+func ptrIfNotEmpty(v string) *string {
+ if v == "" {
+ return nil
+ }
+ return &v
+}
+
+func scrapeState(scrapeCtx context.Context, update *scraper.ScrapeUpdate, paused bool) string {
+ switch {
+ case update.FatalErr != nil:
+ return scrapeStateFailed
+ case update.Done && scrapeCtx != nil && scrapeCtx.Err() != nil:
+ return scrapeStateCancelled
+ case update.Done:
+ return scrapeStateCompleted
+ case paused:
+ return scrapeStatePaused
+ default:
+ return scrapeStateRunning
+ }
+}
+
+func scrapingStatusFromUpdate(
+ scrapeCtx context.Context,
+ scraperID string,
+ update *scraper.ScrapeUpdate,
+ paused bool,
+) models.ScrapingStatusResponse {
+ display := systemProgressDisplay(update.SystemID)
+ status := models.ScrapingStatusResponse{
+ ScraperID: scraperID,
+ SystemID: update.SystemID,
+ Processed: update.Processed,
+ Total: update.Total,
+ Matched: update.Matched,
+ Skipped: update.Skipped,
+ Scraping: !update.Done,
+ Done: update.Done,
+ Paused: paused && !update.Done,
+ State: scrapeState(scrapeCtx, update, paused && !update.Done),
+ TotalSteps: ptrIfPositive(update.TotalSteps),
+ CurrentStep: ptrIfPositive(update.CurrentStep),
+ CurrentStepDisplay: ptrIfNotEmpty(display),
+ }
+ if update.FatalErr != nil {
+ status.Error = update.FatalErr.Error()
+ }
+ if update.SystemID != "" {
+ status.CurrentSystem = &models.ScrapeSystemProgressResponse{
+ SystemID: update.SystemID,
+ SystemName: display,
+ Processed: update.Processed,
+ Total: update.Total,
+ Matched: update.Matched,
+ Skipped: update.Skipped,
+ }
+ }
+ return status
+}
+
func PublishScrapePauseStatus(ns chan<- models.Notification, paused bool) {
status := scrapingStatusInstance.getLatest()
status.Scraping = true
status.Paused = paused
+ status.State = scrapeStateRunning
+ if paused {
+ status.State = scrapeStatePaused
+ }
publishScrapingStatus(ns, &status)
}
@@ -300,8 +393,13 @@ func HandleMediaScrape(env requests.RequestEnv) (any, error) { //nolint:gocritic
ns := env.State.Notifications
db := env.Database
+ initialState := scrapeStateRunning
+ if paused {
+ initialState = scrapeStatePaused
+ }
initialStatus := models.ScrapingStatusResponse{
ScraperID: params.ScraperID,
+ State: initialState,
Scraping: true,
Paused: paused,
}
@@ -320,17 +418,8 @@ func HandleMediaScrape(env requests.RequestEnv) (any, error) { //nolint:gocritic
if update.Done {
receivedDone = true
}
- status := models.ScrapingStatusResponse{
- ScraperID: scraperID,
- SystemID: update.SystemID,
- Processed: update.Processed,
- Total: update.Total,
- Matched: update.Matched,
- Skipped: update.Skipped,
- Scraping: !update.Done,
- Done: update.Done,
- Paused: env.ScrapePauser != nil && env.ScrapePauser.IsPaused() && !update.Done,
- }
+ paused := env.ScrapePauser != nil && env.ScrapePauser.IsPaused()
+ status := scrapingStatusFromUpdate(scrapeCtx, scraperID, &update, paused)
if update.Done {
populateScrapedMediaCountExact(env.State.GetContext(), db, &status)
} else {
@@ -344,21 +433,14 @@ func HandleMediaScrape(env requests.RequestEnv) (any, error) { //nolint:gocritic
// Otherwise the channel already delivered the final counters and sending
// another zeroed-out Done would overwrite them for consumers.
if !receivedDone {
- status := scrapingStatusInstance.getLatest()
- status.ScraperID = scraperID
- status.Scraping = false
- status.Done = true
- status.Paused = false
- terminalStatus := models.ScrapingStatusResponse{
- ScraperID: status.ScraperID,
- SystemID: status.SystemID,
- Processed: status.Processed,
- Total: status.Total,
- Matched: status.Matched,
- Skipped: status.Skipped,
- Scraping: status.Scraping,
- Done: status.Done,
- Paused: status.Paused,
+ terminalStatus := scrapingStatusInstance.getLatest()
+ terminalStatus.ScraperID = scraperID
+ terminalStatus.Scraping = false
+ terminalStatus.Done = true
+ terminalStatus.Paused = false
+ terminalStatus.State = scrapeStateCompleted
+ if scrapeCtx.Err() != nil {
+ terminalStatus.State = scrapeStateCancelled
}
populateScrapedMediaCountExact(env.State.GetContext(), db, &terminalStatus)
publishScrapingStatus(ns, &terminalStatus)
@@ -374,8 +456,18 @@ func HandleMediaScrape(env requests.RequestEnv) (any, error) { //nolint:gocritic
//nolint:gocritic // API handler signature; large env param cannot be passed by pointer
func HandleMediaScrapeStatus(env requests.RequestEnv) (any, error) {
status := scrapingStatusInstance.getLatest()
+ if status.State == "" {
+ status.State = scrapeStateIdle
+ if status.Scraping {
+ status.State = scrapeStateRunning
+ }
+ }
if status.Scraping && env.ScrapePauser != nil {
status.Paused = env.ScrapePauser.IsPaused()
+ status.State = scrapeStateRunning
+ if status.Paused {
+ status.State = scrapeStatePaused
+ }
}
if env.Database == nil || env.Database.MediaDB == nil {
return status, nil
diff --git a/pkg/api/methods/media_scrape_test.go b/pkg/api/methods/media_scrape_test.go
index 666fd556..c7a98a4b 100644
--- a/pkg/api/methods/media_scrape_test.go
+++ b/pkg/api/methods/media_scrape_test.go
@@ -343,6 +343,7 @@ func TestHandleMediaScrapeStatus_TracksLatestProgress(t *testing.T) {
assert.Equal(t, models.ScrapingStatusResponse{
ScraperID: "test-scraper",
SystemID: "SNES",
+ State: "running",
Processed: 42,
Total: 100,
Matched: 38,
@@ -484,6 +485,7 @@ func TestHandleMediaScrapeCancel_CancelsActive(t *testing.T) {
require.True(t, ok)
assert.False(t, status.Scraping)
assert.True(t, status.Done)
+ assert.Equal(t, "cancelled", status.State)
}
func TestHandleMediaScrapeResume_ResumesPausedScrape(t *testing.T) {
@@ -518,6 +520,7 @@ func TestHandleMediaScrapeResume_ResumesPausedScrape(t *testing.T) {
require.NoError(t, json.Unmarshal(notif.Params, &payload))
assert.True(t, payload.Scraping)
assert.False(t, payload.Paused)
+ assert.Equal(t, "running", payload.State)
}
func TestHandleMediaScrapeResume_NilPauser(t *testing.T) {
@@ -645,6 +648,72 @@ func TestHandleMediaScrape_CachesProgressScrapedCountAndRefreshesDone(t *testing
mockDB.AssertExpectations(t)
}
+func TestHandleMediaScrape_EmitsFatalStatus(t *testing.T) {
+ // Not parallel — manipulates shared scrapingStatusInstance.
+ ClearScrapingStatus()
+ statusInstance.clear()
+
+ mockDB := testhelpers.NewMockMediaDBI()
+ mockDB.On("TrackBackgroundOperation").Return()
+ mockDB.On("BackgroundOperationDone").Return()
+ mockDB.On("GetScrapedMediaCount", assertmock.Anything, "fatal-scraper").Return(0, nil)
+
+ scrapeErr := errors.New("parse failed")
+ fatalScraper := platforms.Scraper{
+ ID: "fatal-scraper",
+ Name: "Fatal Scraper",
+ Scrape: func(
+ _ context.Context, _ *config.Instance, _ platforms.Platform,
+ _ afero.Fs, _ *database.Database, _ scraper.ScrapeOptions,
+ _ platforms.ScraperCustomOptions, ch chan<- scraper.ScrapeUpdate,
+ ) error {
+ go func() {
+ defer close(ch)
+ ch <- scraper.ScrapeUpdate{
+ SystemID: "SNES", FatalErr: scrapeErr, Done: true, TotalSteps: 2, CurrentStep: 1,
+ }
+ }()
+ return nil
+ },
+ }
+
+ pl := mocks.NewMockPlatform()
+ pl.On("Scrapers", assertmock.Anything).Return(map[string]platforms.Scraper{"fatal-scraper": fatalScraper})
+ pl.SetupBasicMock()
+ st, ns := state.NewState(pl, "test")
+ t.Cleanup(st.StopService)
+
+ _, err := HandleMediaScrape(requests.RequestEnv{
+ Context: context.Background(),
+ Platform: pl,
+ State: st,
+ Database: &database.Database{MediaDB: mockDB},
+ Params: json.RawMessage(`{"scraperId":"fatal-scraper"}`),
+ })
+ require.NoError(t, err)
+
+ var gotFatal bool
+ timeout := time.After(2 * time.Second)
+ for !gotFatal {
+ select {
+ case n := <-ns:
+ if n.Method != models.NotificationMediaScraping {
+ continue
+ }
+ var p models.ScrapingStatusResponse
+ require.NoError(t, json.Unmarshal(n.Params, &p))
+ if p.Done {
+ assert.Equal(t, "failed", p.State)
+ assert.Equal(t, "parse failed", p.Error)
+ gotFatal = true
+ }
+ case <-timeout:
+ t.Fatal("timed out waiting for fatal media.scraping notification")
+ }
+ }
+ mockDB.AssertExpectations(t)
+}
+
func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) {
// Not parallel — manipulates shared scrapingStatusInstance.
ClearScrapingStatus()
@@ -665,9 +734,13 @@ func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) {
) error {
go func() {
defer close(ch)
- ch <- scraper.ScrapeUpdate{SystemID: "SNES", Total: 10, Processed: 5, Matched: 4, Skipped: 1}
ch <- scraper.ScrapeUpdate{
- SystemID: "SNES", Total: 10, Processed: 10, Matched: 8, Skipped: 2, Done: true,
+ SystemID: "SNES", Total: 10, Processed: 5, Matched: 4, Skipped: 1,
+ TotalSteps: 2, CurrentStep: 1,
+ }
+ ch <- scraper.ScrapeUpdate{
+ SystemID: "SNES", Total: 10, Processed: 10, Matched: 8, Skipped: 2,
+ TotalSteps: 2, CurrentStep: 1, Done: true,
}
}()
return nil
@@ -696,6 +769,7 @@ func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) {
var gotDone bool
var maxProcessed int
+ var sawCurrentSystem bool
timeout := time.After(2 * time.Second)
for !gotDone {
select {
@@ -708,6 +782,17 @@ func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) {
if p.Processed > maxProcessed {
maxProcessed = p.Processed
}
+ if p.CurrentSystem != nil && !p.Done {
+ sawCurrentSystem = true
+ assert.Equal(t, "SNES", p.CurrentSystem.SystemID)
+ assert.Equal(t, p.Processed, p.CurrentSystem.Processed)
+ assert.Equal(t, p.Total, p.CurrentSystem.Total)
+ require.NotNil(t, p.TotalSteps)
+ require.NotNil(t, p.CurrentStep)
+ assert.Equal(t, 2, *p.TotalSteps)
+ assert.Equal(t, 1, *p.CurrentStep)
+ assert.Equal(t, "running", p.State)
+ }
if p.Done {
gotDone = true
}
@@ -717,6 +802,7 @@ func TestHandleMediaScrape_EmitsProgressUpdates(t *testing.T) {
}
assert.Equal(t, 10, maxProcessed, "progress updates should reflect actual processed count")
+ assert.True(t, sawCurrentSystem, "progress updates should expose currentSystem and step fields")
require.Eventually(t, func() bool {
return !IsScrapingRunning()
}, 2*time.Second, 10*time.Millisecond)
diff --git a/pkg/api/models/responses.go b/pkg/api/models/responses.go
index 1c90730b..cd331d34 100644
--- a/pkg/api/models/responses.go
+++ b/pkg/api/models/responses.go
@@ -290,19 +290,34 @@ type MediaImageResponse struct {
TypeTag string `json:"typeTag"` // e.g. "property:image-boxart"
}
+type ScrapeSystemProgressResponse struct {
+ SystemID string `json:"systemId"`
+ SystemName string `json:"systemName,omitempty"`
+ Processed int `json:"processed"`
+ Total int `json:"total"`
+ Matched int `json:"matched"`
+ Skipped int `json:"skipped"`
+}
+
// ScrapingStatusResponse is broadcast as a "media.scraping" notification for
// each ScrapeUpdate received from the scraper and on completion/cancellation.
type ScrapingStatusResponse struct {
- ScraperID string `json:"scraperId,omitempty"`
- SystemID string `json:"systemId,omitempty"`
- Processed int `json:"processed"`
- Total int `json:"total"`
- Matched int `json:"matched"`
- Skipped int `json:"skipped"`
- TotalScraped int `json:"totalScraped"`
- Scraping bool `json:"scraping"`
- Done bool `json:"done"`
- Paused bool `json:"paused"`
+ CurrentStep *int `json:"currentStep,omitempty"`
+ CurrentStepDisplay *string `json:"currentStepDisplay,omitempty"`
+ TotalSteps *int `json:"totalSteps,omitempty"`
+ CurrentSystem *ScrapeSystemProgressResponse `json:"currentSystem,omitempty"`
+ ScraperID string `json:"scraperId,omitempty"`
+ SystemID string `json:"systemId,omitempty"`
+ State string `json:"state,omitempty"`
+ Error string `json:"error,omitempty"`
+ Processed int `json:"processed"`
+ Total int `json:"total"`
+ Matched int `json:"matched"`
+ Skipped int `json:"skipped"`
+ TotalScraped int `json:"totalScraped"`
+ Scraping bool `json:"scraping"`
+ Done bool `json:"done"`
+ Paused bool `json:"paused"`
}
type MediaLookupMatch struct {
diff --git a/pkg/api/notifications/notifications_test.go b/pkg/api/notifications/notifications_test.go
index 5b631567..0510f54e 100644
--- a/pkg/api/notifications/notifications_test.go
+++ b/pkg/api/notifications/notifications_test.go
@@ -252,14 +252,29 @@ func TestMediaScraping_Payload(t *testing.T) {
ns := make(chan models.Notification, 1)
+ totalSteps := 5
+ currentStep := 2
+ currentStepDisplay := "Nintendo Entertainment System"
payload := models.ScrapingStatusResponse{
- ScraperID: "gamelist.xml",
- SystemID: "nes",
- Processed: 42,
- Total: 100,
- Matched: 38,
- Skipped: 4,
- Scraping: true,
+ ScraperID: "gamelist.xml",
+ SystemID: "nes",
+ State: "running",
+ Processed: 42,
+ Total: 100,
+ Matched: 38,
+ Skipped: 4,
+ Scraping: true,
+ TotalSteps: &totalSteps,
+ CurrentStep: ¤tStep,
+ CurrentStepDisplay: ¤tStepDisplay,
+ CurrentSystem: &models.ScrapeSystemProgressResponse{
+ SystemID: "nes",
+ SystemName: currentStepDisplay,
+ Processed: 42,
+ Total: 100,
+ Matched: 38,
+ Skipped: 4,
+ },
}
MediaScraping(ns, &payload)
@@ -276,6 +291,15 @@ func TestMediaScraping_Payload(t *testing.T) {
assert.Equal(t, payload.Total, received.Total)
assert.Equal(t, payload.Matched, received.Matched)
assert.Equal(t, payload.Skipped, received.Skipped)
+ assert.Equal(t, payload.State, received.State)
+ require.NotNil(t, received.TotalSteps)
+ require.NotNil(t, received.CurrentStep)
+ require.NotNil(t, received.CurrentStepDisplay)
+ require.NotNil(t, received.CurrentSystem)
+ assert.Equal(t, totalSteps, *received.TotalSteps)
+ assert.Equal(t, currentStep, *received.CurrentStep)
+ assert.Equal(t, currentStepDisplay, *received.CurrentStepDisplay)
+ assert.Equal(t, "nes", received.CurrentSystem.SystemID)
assert.True(t, received.Scraping)
}
diff --git a/pkg/database/scraper/gamelistxml/scraper.go b/pkg/database/scraper/gamelistxml/scraper.go
index 1a8eeaf3..b8ff5d4a 100644
--- a/pkg/database/scraper/gamelistxml/scraper.go
+++ b/pkg/database/scraper/gamelistxml/scraper.go
@@ -532,33 +532,45 @@ func (g *GamelistXMLScraper) scrapeLoop(
const id = "gamelist.xml"
var totalProcessed, totalMatched, totalSkipped int
+ totalSteps := len(systems)
- waitForResume := func(systemID string, processed, matched, skipped int) bool {
+ waitForResume := func(systemID string, currentStep, processed, matched, skipped int) bool {
if waitErr := opts.Pauser.Wait(ctx); waitErr != nil {
ch <- scraper.ScrapeUpdate{
- SystemID: systemID,
- Processed: processed,
- Matched: matched,
- Skipped: skipped,
- Done: true,
+ SystemID: systemID,
+ Processed: processed,
+ Matched: matched,
+ Skipped: skipped,
+ TotalSteps: totalSteps,
+ CurrentStep: currentStep,
+ Done: true,
}
return false
}
return true
}
- for _, system := range systems {
- if !waitForResume(system.ID, 0, 0, 0) {
+ for i, system := range systems {
+ currentStep := i + 1
+ sendUpdate := func(update scraper.ScrapeUpdate) {
+ update.TotalSteps = totalSteps
+ update.CurrentStep = currentStep
+ ch <- update
+ }
+ if !waitForResume(system.ID, currentStep, totalProcessed, totalMatched, totalSkipped) {
return
}
select {
case <-ctx.Done():
ch <- scraper.ScrapeUpdate{
- Done: true, Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped,
+ Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped,
+ TotalSteps: totalSteps, CurrentStep: currentStep, Done: true,
}
return
- case ch <- scraper.ScrapeUpdate{SystemID: system.ID, Total: 0}:
+ case ch <- scraper.ScrapeUpdate{
+ SystemID: system.ID, Total: 0, TotalSteps: totalSteps, CurrentStep: currentStep,
+ }:
}
titlesBySlug := make(map[string]database.MediaTitle)
@@ -567,10 +579,10 @@ func (g *GamelistXMLScraper) scrapeLoop(
allTitles, titlesErr := mdb.GetTitlesBySystemID(system.ID)
if titlesErr != nil {
if errors.Is(titlesErr, context.Canceled) || errors.Is(titlesErr, context.DeadlineExceeded) {
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true})
return
}
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: titlesErr, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: titlesErr, Done: true})
return
}
for _, t := range allTitles {
@@ -586,10 +598,10 @@ func (g *GamelistXMLScraper) scrapeLoop(
unscraped, titlesErr := mdb.FindMediaTitlesWithoutSentinel(ctx, system.DBID, sentinelTag)
if titlesErr != nil {
if errors.Is(titlesErr, context.Canceled) || errors.Is(titlesErr, context.DeadlineExceeded) {
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true})
return
}
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: titlesErr, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: titlesErr, Done: true})
return
}
for _, t := range unscraped {
@@ -598,10 +610,10 @@ func (g *GamelistXMLScraper) scrapeLoop(
allTitles, allTitlesErr := mdb.GetTitlesBySystemID(system.ID)
if allTitlesErr != nil {
if errors.Is(allTitlesErr, context.Canceled) || errors.Is(allTitlesErr, context.DeadlineExceeded) {
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true})
return
}
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: allTitlesErr, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: allTitlesErr, Done: true})
return
}
for _, t := range allTitles {
@@ -614,10 +626,10 @@ func (g *GamelistXMLScraper) scrapeLoop(
allMedia, mediaErr := mdb.GetMediaBySystemID(system.ID)
if mediaErr != nil {
if errors.Is(mediaErr, context.Canceled) || errors.Is(mediaErr, context.DeadlineExceeded) {
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true})
return
}
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: mediaErr, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: mediaErr, Done: true})
return
}
scrapedIDs := map[int64]struct{}{}
@@ -626,10 +638,10 @@ func (g *GamelistXMLScraper) scrapeLoop(
scrapedIDs, scrapedErr = mdb.GetScrapedMediaIDs(ctx, id, system.DBID)
if scrapedErr != nil {
if errors.Is(scrapedErr, context.Canceled) || errors.Is(scrapedErr, context.DeadlineExceeded) {
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true})
return
}
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: scrapedErr, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: scrapedErr, Done: true})
return
}
}
@@ -660,26 +672,30 @@ func (g *GamelistXMLScraper) scrapeLoop(
parsed, parseErr := g.loadParsedGamelistSystem(ctx, system)
if parseErr != nil {
if errors.Is(parseErr, context.Canceled) || errors.Is(parseErr, context.DeadlineExceeded) {
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, Done: true})
return
}
- ch <- scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: parseErr, Done: true}
+ sendUpdate(scraper.ScrapeUpdate{SystemID: system.ID, FatalErr: parseErr, Done: true})
return
}
- companion := g.processCompanionEntriesFromParsed(ctx, opts, system, mdb, indexes, parsed, ch)
- if !waitForResume(system.ID, companion.Processed, companion.Matched, companion.Skipped) {
+ companion := g.processCompanionEntriesFromParsed(
+ ctx, opts, system, mdb, indexes, parsed, ch, totalSteps, currentStep,
+ )
+ if !waitForResume(system.ID, currentStep, companion.Processed, companion.Matched, companion.Skipped) {
return
}
if len(indexes.TitlesBySlug) == 0 && len(indexes.MediaByPathFold) == 0 {
if companion.Processed > 0 {
ch <- scraper.ScrapeUpdate{
- SystemID: system.ID,
- Total: companion.Processed,
- Processed: companion.Processed,
- Matched: companion.Matched,
- Skipped: companion.Skipped,
+ SystemID: system.ID,
+ Total: companion.Processed,
+ Processed: companion.Processed,
+ Matched: companion.Matched,
+ Skipped: companion.Skipped,
+ TotalSteps: totalSteps,
+ CurrentStep: currentStep,
}
}
totalProcessed += companion.Processed
@@ -691,16 +707,16 @@ func (g *GamelistXMLScraper) scrapeLoop(
records, loadErr := g.loadRecordsFromParsed(ctx, system, indexes, parsed)
if loadErr != nil {
if errors.Is(loadErr, context.Canceled) || errors.Is(loadErr, context.DeadlineExceeded) {
- ch <- scraper.ScrapeUpdate{
+ sendUpdate(scraper.ScrapeUpdate{
SystemID: system.ID, Done: true, Processed: companion.Processed, Matched: companion.Matched,
Skipped: companion.Skipped,
- }
+ })
return
}
- ch <- scraper.ScrapeUpdate{
+ sendUpdate(scraper.ScrapeUpdate{
SystemID: system.ID, FatalErr: loadErr,
Done: true, Processed: companion.Processed, Matched: companion.Matched, Skipped: companion.Skipped,
- }
+ })
return
}
@@ -712,19 +728,23 @@ func (g *GamelistXMLScraper) scrapeLoop(
select {
case <-ctx.Done():
ch <- scraper.ScrapeUpdate{
- Done: true, Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped,
+ Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped,
+ TotalSteps: totalSteps, CurrentStep: currentStep, Done: true,
}
return
case ch <- scraper.ScrapeUpdate{
- SystemID: system.ID,
- Total: systemTotal,
- Processed: companion.Processed,
- Matched: companion.Matched,
- Skipped: companion.Skipped,
+ SystemID: system.ID,
+ Total: systemTotal,
+ Processed: companion.Processed,
+ Matched: companion.Matched,
+ Skipped: companion.Skipped,
+ TotalSteps: totalSteps,
+ CurrentStep: currentStep,
}:
}
if !waitForResume(
system.ID,
+ currentStep,
companion.Processed+processed,
companion.Matched+matched,
companion.Skipped+skipped,
@@ -742,14 +762,18 @@ func (g *GamelistXMLScraper) scrapeLoop(
update.Processed += companion.Processed
update.Matched += companion.Matched
update.Skipped += companion.Skipped
+ update.TotalSteps = totalSteps
+ update.CurrentStep = currentStep
select {
case <-ctx.Done():
ch <- scraper.ScrapeUpdate{
- SystemID: system.ID,
- Processed: companion.Processed + processed,
- Matched: companion.Matched + matched,
- Skipped: companion.Skipped + skipped,
- Done: true,
+ SystemID: system.ID,
+ Processed: companion.Processed + processed,
+ Matched: companion.Matched + matched,
+ Skipped: companion.Skipped + skipped,
+ TotalSteps: totalSteps,
+ CurrentStep: currentStep,
+ Done: true,
}
return false
case ch <- update:
@@ -761,6 +785,7 @@ func (g *GamelistXMLScraper) scrapeLoop(
for _, record := range records {
if !waitForResume(
system.ID,
+ currentStep,
companion.Processed+processed,
companion.Matched+matched,
companion.Skipped+skipped,
@@ -770,11 +795,13 @@ func (g *GamelistXMLScraper) scrapeLoop(
select {
case <-ctx.Done():
ch <- scraper.ScrapeUpdate{
- SystemID: system.ID,
- Processed: companion.Processed + processed,
- Matched: companion.Matched + matched,
- Skipped: companion.Skipped + skipped,
- Done: true,
+ SystemID: system.ID,
+ Processed: companion.Processed + processed,
+ Matched: companion.Matched + matched,
+ Skipped: companion.Skipped + skipped,
+ TotalSteps: totalSteps,
+ CurrentStep: currentStep,
+ Done: true,
}
return
default:
@@ -809,6 +836,7 @@ func (g *GamelistXMLScraper) scrapeLoop(
}
if !waitForResume(
system.ID,
+ currentStep,
companion.Processed+processed,
companion.Matched+matched,
companion.Skipped+skipped,
@@ -860,7 +888,10 @@ func (g *GamelistXMLScraper) scrapeLoop(
totalSkipped += companion.Skipped + skipped
}
- ch <- scraper.ScrapeUpdate{Done: true, Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped}
+ ch <- scraper.ScrapeUpdate{
+ Done: true, Processed: totalProcessed, Matched: totalMatched, Skipped: totalSkipped,
+ TotalSteps: totalSteps, CurrentStep: totalSteps,
+ }
}
// MapToDB converts a GamelistRecord into the tag and property writes to apply
@@ -1595,7 +1626,7 @@ func (g *GamelistXMLScraper) processCompanionEntries(
if err != nil {
return companionStats{}
}
- return g.processCompanionEntriesFromParsed(ctx, opts, system, mdb, indexes, parsed, ch)
+ return g.processCompanionEntriesFromParsed(ctx, opts, system, mdb, indexes, parsed, ch, 0, 0)
}
func (g *GamelistXMLScraper) processCompanionEntriesFromParsed(
@@ -1606,6 +1637,8 @@ func (g *GamelistXMLScraper) processCompanionEntriesFromParsed(
indexes loadRecordIndexes,
parsed parsedGamelistSystem,
ch chan<- scraper.ScrapeUpdate,
+ totalSteps int,
+ currentStep int,
) companionStats {
parents, children := companionEntriesFromParsed(ctx, system, parsed)
if len(parents) == 0 && len(children) == 0 {
@@ -1642,11 +1675,13 @@ func (g *GamelistXMLScraper) processCompanionEntriesFromParsed(
case <-ctx.Done():
return false
case ch <- scraper.ScrapeUpdate{
- SystemID: system.ID,
- Total: len(children),
- Processed: stats.Processed,
- Matched: stats.Matched,
- Skipped: stats.Skipped,
+ SystemID: system.ID,
+ Total: len(children),
+ Processed: stats.Processed,
+ Matched: stats.Matched,
+ Skipped: stats.Skipped,
+ TotalSteps: totalSteps,
+ CurrentStep: currentStep,
}:
lastProgress = time.Now()
return true
diff --git a/pkg/database/scraper/gamelistxml/scraper_test.go b/pkg/database/scraper/gamelistxml/scraper_test.go
index f767a577..6c684fa2 100644
--- a/pkg/database/scraper/gamelistxml/scraper_test.go
+++ b/pkg/database/scraper/gamelistxml/scraper_test.go
@@ -2647,6 +2647,68 @@ func TestScrapeLoop_ProgressIsPerSystem(t *testing.T) {
mockDB.AssertExpectations(t)
}
+func TestScrapeLoop_PauseCancelBeforeNextSystemPreservesProgress(t *testing.T) {
+ t.Parallel()
+
+ root1 := t.TempDir()
+ root2 := t.TempDir()
+ recordPath := filepath.Join(root1, "first.nes")
+ require.NoError(t, os.WriteFile(filepath.Join(root1, "gamelist.xml"), []byte(`
+
+ ./first.nesFirst
+`), 0o600))
+ require.NoError(t, os.WriteFile(filepath.Join(root2, "gamelist.xml"), []byte(`
+
+ ./second.sfcSecond
+`), 0o600))
+
+ pauser := syncutil.NewPauser()
+ paused := make(chan struct{})
+ mockDB := helpers.NewMockMediaDBI()
+ mockDB.On("GetTitlesBySystemID", "nes").Return([]database.TitleWithSystem{{
+ DBID: 1, SystemDBID: 10, Slug: "first", Name: "First",
+ }}, nil)
+ mockDB.On("GetMediaBySystemID", "nes").Return([]database.MediaWithFullPath{{
+ DBID: 11, MediaTitleDBID: 1, Path: recordPath,
+ }}, nil)
+ mockDB.On("ApplyScrapeResult", mock.Anything, int64(11), int64(1), mock.Anything).
+ Run(func(_ mock.Arguments) {
+ pauser.Pause()
+ close(paused)
+ }).
+ Return(nil)
+
+ s := &GamelistXMLScraper{db: mockDB}
+ ch := make(chan scraper.ScrapeUpdate, 128)
+ ctx, cancel := context.WithCancel(context.Background())
+ done := make(chan struct{})
+ go func() {
+ defer close(done)
+ s.scrapeLoop(ctx, scraper.ScrapeOptions{Pauser: pauser, Force: true}, []scraper.ScrapeSystem{
+ {ID: "nes", ROMPaths: []string{root1}, DBID: 10},
+ {ID: "snes", ROMPaths: []string{root2}, DBID: 20},
+ }, mockDB, ch)
+ }()
+
+ <-paused
+ cancel()
+ <-done
+
+ updates := drainChannel(ch)
+ var doneUpdate scraper.ScrapeUpdate
+ for _, update := range updates {
+ if update.Done {
+ doneUpdate = update
+ }
+ }
+ require.True(t, doneUpdate.Done)
+ assert.Equal(t, "snes", doneUpdate.SystemID)
+ assert.Equal(t, 1, doneUpdate.Processed)
+ assert.Equal(t, 1, doneUpdate.Matched)
+ assert.Equal(t, 0, doneUpdate.Skipped)
+ mockDB.AssertExpectations(t)
+}
+
func TestScrapeLoop_ProgressIncludesCompanionBaseline(t *testing.T) {
t.Parallel()
diff --git a/pkg/database/scraper/scraper.go b/pkg/database/scraper/scraper.go
index ad2b6ad6..44199896 100644
--- a/pkg/database/scraper/scraper.go
+++ b/pkg/database/scraper/scraper.go
@@ -54,14 +54,16 @@ type ScrapeOptions struct {
// ScrapeUpdate is one progress event emitted on the channel returned by Scrape.
type ScrapeUpdate struct {
- Err error
- FatalErr error
- SystemID string
- Processed int
- Total int
- Matched int
- Skipped int
- Done bool
+ Err error
+ FatalErr error
+ SystemID string
+ Processed int
+ Total int
+ Matched int
+ Skipped int
+ TotalSteps int
+ CurrentStep int
+ Done bool
}
// MatchResult is the output of a successful Match call. Both IDs must be