Skip to content

Commit 04ebf99

Browse files
committed
feat(services): Add ScoringFeedbackService for relevance scoring feedback loop
- Implemented `ScoringFeedbackService` to learn from curator decisions and adjust relevance scoring weights. - Added `computeLearnedWeights` and `analyzeFeedback` methods to derive and analyze weight adjustments based on accepted and rejected candidates. - Updated `RelevanceScoringService` to support dynamic weight application and defaults fallback. - Enhanced `PublicationDiscoveryService` to integrate feedback-based weight updates before candidate scoring. - Extended repositories and added unit tests to validate feedback, weight adjustments, and integration behavior.
1 parent 16c4b08 commit 04ebf99

File tree

6 files changed

+528
-58
lines changed

6 files changed

+528
-58
lines changed

app/repositories/PublicationCandidateRepository.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ trait PublicationCandidateRepository {
2121
def bulkReject(ids: Seq[Int], reason: String, reviewedBy: UUID): Future[Int]
2222
def saveCandidates(candidates: Seq[PublicationCandidate]): Future[Seq[PublicationCandidate]]
2323
def countByStatus(): Future[Map[String, Int]]
24+
def listReviewed(): Future[Seq[PublicationCandidate]]
2425
}
2526

2627
@Singleton
@@ -131,4 +132,8 @@ class PublicationCandidateRepositoryImpl @Inject()(
131132
(status, group.length)
132133
}.result).map(_.toMap)
133134
}
135+
136+
override def listReviewed(): Future[Seq[PublicationCandidate]] = {
137+
db.run(candidatesTable.filter(c => c.status === "accepted" || c.status === "rejected").result)
138+
}
134139
}

app/services/PublicationDiscoveryService.scala

Lines changed: 58 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ class PublicationDiscoveryService @Inject()(
1616
publicationRepository: PublicationRepository,
1717
publicationService: PublicationService,
1818
openAlexService: OpenAlexService,
19-
relevanceScoringService: RelevanceScoringService
19+
relevanceScoringService: RelevanceScoringService,
20+
scoringFeedbackService: ScoringFeedbackService
2021
)(implicit ec: ExecutionContext) extends Logging {
2122

2223
def acceptCandidate(candidateId: Int, reviewedBy: java.util.UUID): Future[Option[models.domain.publications.Publication]] = {
@@ -62,62 +63,68 @@ class PublicationDiscoveryService @Inject()(
6263
candidateRepository.bulkUpdateStatus(candidateIds, "deferred", reviewedBy, None)
6364
}
6465

66+
def refreshLearnedWeights(): Future[Option[LearnedWeights]] = {
67+
scoringFeedbackService.computeLearnedWeights().map {
68+
case Some(weights) =>
69+
relevanceScoringService.applyLearnedWeights(weights)
70+
Some(weights)
71+
case None =>
72+
relevanceScoringService.clearLearnedWeights()
73+
None
74+
}
75+
}
76+
6577
def runDiscovery(): Future[Unit] = {
6678
logger.info("Starting publication discovery run...")
67-
68-
searchConfigRepository.getEnabledConfigs().flatMap { configs =>
69-
logger.info(s"Found ${configs.size} enabled search configurations.")
70-
71-
val runs = configs.map { config =>
72-
val startTime = System.currentTimeMillis()
73-
74-
// 1. Execute Search
75-
openAlexService.searchWorks(config.searchQuery).flatMap { rawCandidates =>
76-
77-
// 2. Deduplication
78-
// Check against existing Publications
79-
val existingDoisFuture = publicationRepository.getAllDois.map(_.toSet)
80-
// We should also check against existing candidates to avoid duplicates in the queue
81-
// For simplicity, let's assume candidateRepository.saveCandidates handles some level of checking
82-
// or we check explicitly here. The repository implementation I wrote filters by OpenAlexId.
83-
84-
existingDoisFuture.flatMap { existingDois =>
85-
val newCandidates = rawCandidates.filterNot { c =>
86-
c.doi.exists(existingDois.contains)
87-
}
88-
89-
// 3. Calculate Relevance Score using multi-signal scoring
90-
val scoredCandidates = relevanceScoringService.scoreCandidates(newCandidates)
91-
92-
// 4. Save Candidates
93-
candidateRepository.saveCandidates(scoredCandidates).flatMap { savedCandidates =>
94-
val endTime = System.currentTimeMillis()
95-
val duration = (endTime - startTime).toInt
96-
97-
// 5. Log Run
98-
val run = PublicationSearchRun(
99-
id = None,
100-
configId = config.id.get,
101-
runAt = LocalDateTime.now(),
102-
candidatesFound = rawCandidates.size,
103-
newCandidates = savedCandidates.size,
104-
queryUsed = Some(config.searchQuery),
105-
durationMs = Some(duration)
106-
)
107-
108-
for {
109-
_ <- runRepository.create(run)
110-
_ <- searchConfigRepository.updateLastRun(config.id.get, LocalDateTime.now())
111-
} yield ()
79+
80+
// Refresh learned weights from curator feedback before scoring new candidates
81+
refreshLearnedWeights().flatMap { learnedWeights =>
82+
learnedWeights.foreach(w => logger.info(s"Using learned weights from ${w.sampleSize} reviewed candidates."))
83+
84+
searchConfigRepository.getEnabledConfigs().flatMap { configs =>
85+
logger.info(s"Found ${configs.size} enabled search configurations.")
86+
87+
val runs = configs.map { config =>
88+
val startTime = System.currentTimeMillis()
89+
90+
openAlexService.searchWorks(config.searchQuery).flatMap { rawCandidates =>
91+
val existingDoisFuture = publicationRepository.getAllDois.map(_.toSet)
92+
93+
existingDoisFuture.flatMap { existingDois =>
94+
val newCandidates = rawCandidates.filterNot { c =>
95+
c.doi.exists(existingDois.contains)
96+
}
97+
98+
val scoredCandidates = relevanceScoringService.scoreCandidates(newCandidates)
99+
100+
candidateRepository.saveCandidates(scoredCandidates).flatMap { savedCandidates =>
101+
val endTime = System.currentTimeMillis()
102+
val duration = (endTime - startTime).toInt
103+
104+
val run = PublicationSearchRun(
105+
id = None,
106+
configId = config.id.get,
107+
runAt = LocalDateTime.now(),
108+
candidatesFound = rawCandidates.size,
109+
newCandidates = savedCandidates.size,
110+
queryUsed = Some(config.searchQuery),
111+
durationMs = Some(duration)
112+
)
113+
114+
for {
115+
_ <- runRepository.create(run)
116+
_ <- searchConfigRepository.updateLastRun(config.id.get, LocalDateTime.now())
117+
} yield ()
118+
}
112119
}
120+
}.recover {
121+
case e: Exception =>
122+
logger.error(s"Error running discovery for config '${config.name}' (ID: ${config.id}): ${e.getMessage}", e)
113123
}
114-
}.recover {
115-
case e: Exception =>
116-
logger.error(s"Error running discovery for config '${config.name}' (ID: ${config.id}): ${e.getMessage}", e)
117124
}
125+
126+
Future.sequence(runs).map(_ => logger.info("Publication discovery run completed."))
118127
}
119-
120-
Future.sequence(runs).map(_ => logger.info("Publication discovery run completed."))
121128
}
122129
}
123130
}

app/services/RelevanceScoringService.scala

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import models.domain.publications.PublicationCandidate
55
import play.api.libs.json.{JsArray, JsValue}
66
import play.api.{Configuration, Logging}
77

8+
import java.util.concurrent.atomic.AtomicReference
89
import scala.concurrent.ExecutionContext
910

1011
/**
@@ -21,11 +22,38 @@ class RelevanceScoringService @Inject()(
2122
configuration: Configuration
2223
)(implicit ec: ExecutionContext) extends Logging {
2324

24-
// Weights for each scoring component (should sum to 1.0)
25-
private val keywordWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.keywordWeight").getOrElse(0.35)
26-
private val conceptWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.conceptWeight").getOrElse(0.25)
27-
private val citationWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.citationWeight").getOrElse(0.20)
28-
private val journalWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.journalWeight").getOrElse(0.20)
25+
// Default weights for each scoring component (should sum to 1.0)
26+
private val defaultKeywordWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.keywordWeight").getOrElse(0.35)
27+
private val defaultConceptWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.conceptWeight").getOrElse(0.25)
28+
private val defaultCitationWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.citationWeight").getOrElse(0.20)
29+
private val defaultJournalWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.journalWeight").getOrElse(0.20)
30+
31+
// Learned weights from feedback loop (overrides defaults when set)
32+
private val learnedWeightsRef: AtomicReference[Option[LearnedWeights]] = new AtomicReference(None)
33+
34+
def applyLearnedWeights(weights: LearnedWeights): Unit = {
35+
learnedWeightsRef.set(Some(weights))
36+
logger.info(s"Applied learned weights from ${weights.sampleSize} samples: " +
37+
s"keyword=${f"${weights.keywordWeight}%.3f"}, concept=${f"${weights.conceptWeight}%.3f"}, " +
38+
s"citation=${f"${weights.citationWeight}%.3f"}, journal=${f"${weights.journalWeight}%.3f"}")
39+
}
40+
41+
def clearLearnedWeights(): Unit = {
42+
learnedWeightsRef.set(None)
43+
logger.info("Cleared learned weights, reverting to defaults.")
44+
}
45+
46+
def getActiveWeights: (Double, Double, Double, Double) = {
47+
learnedWeightsRef.get() match {
48+
case Some(lw) => (lw.keywordWeight, lw.conceptWeight, lw.citationWeight, lw.journalWeight)
49+
case None => (defaultKeywordWeight, defaultConceptWeight, defaultCitationWeight, defaultJournalWeight)
50+
}
51+
}
52+
53+
private def keywordWeight: Double = getActiveWeights._1
54+
private def conceptWeight: Double = getActiveWeights._2
55+
private def citationWeight: Double = getActiveWeights._3
56+
private def journalWeight: Double = getActiveWeights._4
2957

3058
// High-value keywords for genomics/phylogenetics domain
3159
private[services] val primaryKeywords: Set[String] = Set(
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
package services
2+
3+
import jakarta.inject.{Inject, Singleton}
4+
import models.domain.publications.PublicationCandidate
5+
import play.api.Logging
6+
import repositories.PublicationCandidateRepository
7+
8+
import scala.concurrent.{ExecutionContext, Future}
9+
10+
/**
11+
* Learns from curator accept/reject decisions to improve relevance scoring.
12+
*
13+
* Analyzes historical decisions by computing per-component score distributions
14+
* for accepted vs rejected candidates, then derives adjusted weights that
15+
* emphasize components with higher discriminative power.
16+
*/
17+
@Singleton
18+
class ScoringFeedbackService @Inject()(
19+
candidateRepository: PublicationCandidateRepository,
20+
relevanceScoringService: RelevanceScoringService
21+
)(implicit ec: ExecutionContext) extends Logging {
22+
23+
val MinSamplesForFeedback: Int = 10
24+
25+
/**
26+
* Analyze all reviewed candidates and compute learned weight adjustments.
27+
* Returns None if insufficient data (< MinSamplesForFeedback reviewed candidates).
28+
*/
29+
def computeLearnedWeights(): Future[Option[LearnedWeights]] = {
30+
candidateRepository.listReviewed().map { reviewed =>
31+
if (reviewed.size < MinSamplesForFeedback) {
32+
logger.info(s"Insufficient reviewed candidates (${reviewed.size}/$MinSamplesForFeedback) for feedback learning.")
33+
None
34+
} else {
35+
val accepted = reviewed.filter(_.status == "accepted")
36+
val rejected = reviewed.filter(_.status == "rejected")
37+
38+
if (accepted.isEmpty || rejected.isEmpty) {
39+
logger.info("Need both accepted and rejected candidates for feedback learning.")
40+
None
41+
} else {
42+
Some(deriveWeights(accepted, rejected))
43+
}
44+
}
45+
}
46+
}
47+
48+
/**
49+
* Compute a feedback analysis report with per-component statistics.
50+
*/
51+
def analyzeFeedback(): Future[Option[FeedbackAnalysis]] = {
52+
candidateRepository.listReviewed().map { reviewed =>
53+
val accepted = reviewed.filter(_.status == "accepted")
54+
val rejected = reviewed.filter(_.status == "rejected")
55+
56+
if (accepted.isEmpty && rejected.isEmpty) None
57+
else {
58+
val acceptedBreakdowns = accepted.map(relevanceScoringService.scoreBreakdown)
59+
val rejectedBreakdowns = rejected.map(relevanceScoringService.scoreBreakdown)
60+
61+
Some(FeedbackAnalysis(
62+
totalReviewed = reviewed.size,
63+
acceptedCount = accepted.size,
64+
rejectedCount = rejected.size,
65+
acceptedMeans = computeMeans(acceptedBreakdowns),
66+
rejectedMeans = computeMeans(rejectedBreakdowns),
67+
componentDiscriminativePower = computeDiscriminativePower(acceptedBreakdowns, rejectedBreakdowns)
68+
))
69+
}
70+
}
71+
}
72+
73+
private[services] def deriveWeights(
74+
accepted: Seq[PublicationCandidate],
75+
rejected: Seq[PublicationCandidate]
76+
): LearnedWeights = {
77+
val acceptedBreakdowns = accepted.map(relevanceScoringService.scoreBreakdown)
78+
val rejectedBreakdowns = rejected.map(relevanceScoringService.scoreBreakdown)
79+
80+
val discriminativePower = computeDiscriminativePower(acceptedBreakdowns, rejectedBreakdowns)
81+
82+
// Compute new weights proportional to discriminative power,
83+
// blended with original weights for stability (70% original, 30% learned)
84+
val blendRatio = 0.3
85+
val originalWeights = Map(
86+
"keyword" -> relevanceScoringService.scoreBreakdown(accepted.head).keywordWeight,
87+
"concept" -> relevanceScoringService.scoreBreakdown(accepted.head).conceptWeight,
88+
"citation" -> relevanceScoringService.scoreBreakdown(accepted.head).citationWeight,
89+
"journal" -> relevanceScoringService.scoreBreakdown(accepted.head).journalWeight
90+
)
91+
92+
// Normalize discriminative power to sum to 1.0 for use as weights
93+
val totalPower = discriminativePower.values.sum
94+
val learnedRaw = if (totalPower > 0) {
95+
discriminativePower.view.mapValues(_ / totalPower).toMap
96+
} else {
97+
originalWeights
98+
}
99+
100+
// Blend: new_weight = (1 - blend) * original + blend * learned
101+
val blended = originalWeights.map { case (component, origWeight) =>
102+
val learnedWeight = learnedRaw.getOrElse(component, origWeight)
103+
component -> ((1.0 - blendRatio) * origWeight + blendRatio * learnedWeight)
104+
}
105+
106+
// Normalize blended weights to sum to 1.0
107+
val blendedTotal = blended.values.sum
108+
val normalized = blended.view.mapValues(_ / blendedTotal).toMap
109+
110+
logger.info(s"Learned weights from ${accepted.size + rejected.size} reviewed candidates: $normalized")
111+
112+
LearnedWeights(
113+
keywordWeight = normalized("keyword"),
114+
conceptWeight = normalized("concept"),
115+
citationWeight = normalized("citation"),
116+
journalWeight = normalized("journal"),
117+
sampleSize = accepted.size + rejected.size,
118+
discriminativePower = discriminativePower
119+
)
120+
}
121+
122+
/**
123+
* Discriminative power = |mean_accepted - mean_rejected| for each component.
124+
* Higher values mean the component better separates accepted from rejected.
125+
*/
126+
private[services] def computeDiscriminativePower(
127+
acceptedBreakdowns: Seq[ScoringBreakdown],
128+
rejectedBreakdowns: Seq[ScoringBreakdown]
129+
): Map[String, Double] = {
130+
val acceptedMeans = computeMeans(acceptedBreakdowns)
131+
val rejectedMeans = computeMeans(rejectedBreakdowns)
132+
133+
Map(
134+
"keyword" -> math.abs(acceptedMeans.getOrElse("keyword", 0.0) - rejectedMeans.getOrElse("keyword", 0.0)),
135+
"concept" -> math.abs(acceptedMeans.getOrElse("concept", 0.0) - rejectedMeans.getOrElse("concept", 0.0)),
136+
"citation" -> math.abs(acceptedMeans.getOrElse("citation", 0.0) - rejectedMeans.getOrElse("citation", 0.0)),
137+
"journal" -> math.abs(acceptedMeans.getOrElse("journal", 0.0) - rejectedMeans.getOrElse("journal", 0.0))
138+
)
139+
}
140+
141+
private[services] def computeMeans(breakdowns: Seq[ScoringBreakdown]): Map[String, Double] = {
142+
if (breakdowns.isEmpty) return Map("keyword" -> 0.0, "concept" -> 0.0, "citation" -> 0.0, "journal" -> 0.0)
143+
144+
val n = breakdowns.size.toDouble
145+
Map(
146+
"keyword" -> breakdowns.map(_.keywordScore).sum / n,
147+
"concept" -> breakdowns.map(_.conceptScore).sum / n,
148+
"citation" -> breakdowns.map(_.citationScore).sum / n,
149+
"journal" -> breakdowns.map(_.journalScore).sum / n
150+
)
151+
}
152+
}
153+
154+
case class LearnedWeights(
155+
keywordWeight: Double,
156+
conceptWeight: Double,
157+
citationWeight: Double,
158+
journalWeight: Double,
159+
sampleSize: Int,
160+
discriminativePower: Map[String, Double]
161+
)
162+
163+
case class FeedbackAnalysis(
164+
totalReviewed: Int,
165+
acceptedCount: Int,
166+
rejectedCount: Int,
167+
acceptedMeans: Map[String, Double],
168+
rejectedMeans: Map[String, Double],
169+
componentDiscriminativePower: Map[String, Double]
170+
)

test/services/PublicationDiscoveryServiceSpec.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,18 @@ class PublicationDiscoveryServiceSpec extends ServiceSpec {
1919
val mockPubService: PublicationService = mock[PublicationService]
2020
val mockOpenAlexService: OpenAlexService = mock[OpenAlexService]
2121
val mockRelevanceScoringService: RelevanceScoringService = mock[RelevanceScoringService]
22+
val mockScoringFeedbackService: ScoringFeedbackService = mock[ScoringFeedbackService]
2223

2324
val service = new PublicationDiscoveryService(
2425
mockSearchConfigRepo, mockCandidateRepo, mockRunRepo,
25-
mockPubRepo, mockPubService, mockOpenAlexService, mockRelevanceScoringService
26+
mockPubRepo, mockPubService, mockOpenAlexService, mockRelevanceScoringService,
27+
mockScoringFeedbackService
2628
)
2729

2830
override def beforeEach(): Unit = {
2931
reset(mockSearchConfigRepo, mockCandidateRepo, mockRunRepo,
30-
mockPubRepo, mockPubService, mockOpenAlexService, mockRelevanceScoringService)
32+
mockPubRepo, mockPubService, mockOpenAlexService, mockRelevanceScoringService,
33+
mockScoringFeedbackService)
3134
}
3235

3336
val reviewerId: UUID = UUID.randomUUID()

0 commit comments

Comments
 (0)