Skip to content

Commit f1ee3e5

Browse files
Aayush MainiCopilot
andcommitted
Reconcile bare/rich component identities in ComponentsFound
Within a single detector, components registered under bare Ids (no DownloadUrl/SourceUrl) are now merged into their rich counterparts sharing the same BaseId. Changes: - ComponentRecorder.GetDetectedComponents(): group by BaseId, merge bare metadata (licenses, suppliers, containers) into all rich entries - DefaultGraphTranslationService.GatherSetOfDetectedComponentsUnmerged(): extend graph lookup to match on BaseId so rich components absorb graph data (roots, ancestors, devDep, scope, file paths) from bare-Id graphs - Tests for both reconciliation levels Addresses work item #2372676. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent af0cff9 commit f1ee3e5

4 files changed

Lines changed: 391 additions & 42 deletions

File tree

src/Microsoft.ComponentDetection.Common/DependencyGraph/ComponentRecorder.cs

Lines changed: 119 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,56 +34,142 @@ public TypedComponent GetComponent(string componentId)
3434

3535
public IEnumerable<DetectedComponent> GetDetectedComponents()
3636
{
37-
IEnumerable<DetectedComponent> detectedComponents;
3837
if (this.singleFileRecorders == null)
3938
{
4039
return [];
4140
}
4241

43-
detectedComponents = this.singleFileRecorders.Values
44-
.SelectMany(singleFileRecorder => singleFileRecorder.GetDetectedComponents().Values)
45-
.GroupBy(x => x.Component.Id)
46-
.Select(grouping =>
47-
{
48-
// We pick a winner here -- any stateful props could get lost at this point.
49-
var winningDetectedComponent = grouping.First();
42+
var allComponents = this.singleFileRecorders.Values
43+
.SelectMany(singleFileRecorder => singleFileRecorder.GetDetectedComponents().Values);
5044

51-
HashSet<string> mergedLicenses = null;
52-
HashSet<ActorInfo> mergedSuppliers = null;
45+
// When both rich and bare entries exist for the same BaseId, rich entries are used as merge targets for bare entries.
46+
var reconciledComponents = new List<DetectedComponent>();
5347

54-
foreach (var component in grouping.Skip(1))
55-
{
56-
winningDetectedComponent.ContainerDetailIds.UnionWith(component.ContainerDetailIds);
48+
foreach (var baseIdGroup in allComponents.GroupBy(x => x.Component.BaseId))
49+
{
50+
var richEntries = new List<DetectedComponent>();
51+
var bareEntries = new List<DetectedComponent>();
5752

58-
// Defensive: merge in case different file recorders set different values for the same component.
59-
if (component.LicensesConcluded != null)
60-
{
61-
mergedLicenses ??= new HashSet<string>(winningDetectedComponent.LicensesConcluded ?? [], StringComparer.OrdinalIgnoreCase);
62-
mergedLicenses.UnionWith(component.LicensesConcluded);
63-
}
53+
// Sub-group by full Id first: merge duplicates of the same Id (existing behavior).
54+
foreach (var idGroup in baseIdGroup.GroupBy(x => x.Component.Id))
55+
{
56+
var merged = MergeDetectedComponentGroup(idGroup);
6457

65-
if (component.Suppliers != null)
66-
{
67-
mergedSuppliers ??= new HashSet<ActorInfo>(winningDetectedComponent.Suppliers ?? []);
68-
mergedSuppliers.UnionWith(component.Suppliers);
69-
}
58+
if (merged.Component.Id == merged.Component.BaseId)
59+
{
60+
bareEntries.Add(merged);
7061
}
71-
72-
if (mergedLicenses != null)
62+
else
7363
{
74-
winningDetectedComponent.LicensesConcluded = mergedLicenses.Where(x => x != null).OrderBy(x => x, StringComparer.OrdinalIgnoreCase).ToList();
64+
richEntries.Add(merged);
7565
}
66+
}
7667

77-
if (mergedSuppliers != null)
68+
if (richEntries.Count > 0 && bareEntries.Count > 0)
69+
{
70+
// Merge each bare entry's metadata into every rich entry, then drop the bare.
71+
foreach (var bare in bareEntries)
7872
{
79-
winningDetectedComponent.Suppliers = mergedSuppliers.Where(s => s != null).OrderBy(s => s.Name).ThenBy(s => s.Type).ToList();
73+
foreach (var rich in richEntries)
74+
{
75+
MergeComponentMetadata(source: bare, target: rich);
76+
}
8077
}
8178

82-
return winningDetectedComponent;
83-
})
84-
.ToArray();
79+
reconciledComponents.AddRange(richEntries);
80+
}
81+
else
82+
{
83+
// No conflict: either all rich (different Ids kept separate) or all bare.
84+
reconciledComponents.AddRange(richEntries);
85+
reconciledComponents.AddRange(bareEntries);
86+
}
87+
}
88+
89+
return reconciledComponents.ToArray();
90+
}
91+
92+
/// <summary>
93+
/// Merges component-level metadata from <paramref name="source"/> into <paramref name="target"/>.
94+
/// </summary>
95+
private static void MergeComponentMetadata(DetectedComponent source, DetectedComponent target)
96+
{
97+
target.ContainerDetailIds.UnionWith(source.ContainerDetailIds);
98+
99+
foreach (var kvp in source.ContainerLayerIds)
100+
{
101+
if (target.ContainerLayerIds.TryGetValue(kvp.Key, out var existingLayers))
102+
{
103+
target.ContainerLayerIds[kvp.Key] = existingLayers.Union(kvp.Value).Distinct().ToList();
104+
}
105+
else
106+
{
107+
target.ContainerLayerIds[kvp.Key] = kvp.Value.ToList();
108+
}
109+
}
110+
111+
target.LicensesConcluded = MergeAndNormalizeLicenses(target.LicensesConcluded, source.LicensesConcluded);
112+
target.Suppliers = MergeAndNormalizeSuppliers(target.Suppliers, source.Suppliers);
113+
}
114+
115+
private static IList<string> MergeAndNormalizeLicenses(IList<string> target, IList<string> source)
116+
{
117+
if (target == null && source == null)
118+
{
119+
return null;
120+
}
121+
122+
var merged = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
123+
124+
if (target != null)
125+
{
126+
merged.UnionWith(target.Where(x => x != null));
127+
}
128+
129+
if (source != null)
130+
{
131+
merged.UnionWith(source.Where(x => x != null));
132+
}
133+
134+
return merged.OrderBy(x => x, StringComparer.OrdinalIgnoreCase).ToList();
135+
}
136+
137+
private static IList<ActorInfo> MergeAndNormalizeSuppliers(IList<ActorInfo> target, IList<ActorInfo> source)
138+
{
139+
if (target == null && source == null)
140+
{
141+
return null;
142+
}
143+
144+
var merged = new HashSet<ActorInfo>();
145+
146+
if (target != null)
147+
{
148+
merged.UnionWith(target.Where(s => s != null));
149+
}
150+
151+
if (source != null)
152+
{
153+
merged.UnionWith(source.Where(s => s != null));
154+
}
155+
156+
return merged.OrderBy(s => s.Name).ThenBy(s => s.Type).ToList();
157+
}
158+
159+
/// <summary>
160+
/// Merges a group of <see cref="DetectedComponent"/>s that share the same <see cref="TypedComponent.Id"/>
161+
/// into a single entry.
162+
/// </summary>
163+
private static DetectedComponent MergeDetectedComponentGroup(IEnumerable<DetectedComponent> grouping)
164+
{
165+
var winner = grouping.First();
166+
167+
foreach (var component in grouping.Skip(1))
168+
{
169+
MergeComponentMetadata(source: component, target: winner);
170+
}
85171

86-
return detectedComponents;
172+
return winner;
87173
}
88174

89175
public IEnumerable<string> GetSkippedComponents()

src/Microsoft.ComponentDetection.Orchestrator/Services/GraphTranslation/DefaultGraphTranslationService.cs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,25 +126,31 @@ private IEnumerable<DetectedComponent> GatherSetOfDetectedComponentsUnmerged(IEn
126126
}
127127

128128
// Information about each component is relative to all of the graphs it is present in, so we take all graphs containing a given component and apply the graph data.
129-
foreach (var graphKvp in dependencyGraphsByLocation.Where(x => x.Value.Contains(component.Component.Id)))
129+
// Also match on BaseId to pick up graphs that registered this component under a bare Id (before reconciliation promoted it to a rich Id).
130+
foreach (var graphKvp in dependencyGraphsByLocation.Where(x => x.Value.Contains(component.Component.Id) || (component.Component.Id != component.Component.BaseId && x.Value.Contains(component.Component.BaseId))))
130131
{
131132
var location = graphKvp.Key;
132133
var dependencyGraph = graphKvp.Value;
133134

135+
// Determine the Id stored in this graph — may be the rich Id or the bare BaseId.
136+
var graphComponentId = dependencyGraph.Contains(component.Component.Id)
137+
? component.Component.Id
138+
: component.Component.BaseId;
139+
134140
// Calculate roots of the component
135141
var rootStartTime = DateTime.UtcNow;
136-
this.AddRootsToDetectedComponent(component, dependencyGraph, componentRecorder);
142+
this.AddRootsToDetectedComponent(component, graphComponentId, dependencyGraph, componentRecorder);
137143
var rootEndTime = DateTime.UtcNow;
138144
totalTimeToAddRoots += rootEndTime - rootStartTime;
139145

140146
// Calculate Ancestors of the component
141147
var ancestorStartTime = DateTime.UtcNow;
142-
this.AddAncestorsToDetectedComponent(component, dependencyGraph, componentRecorder);
148+
this.AddAncestorsToDetectedComponent(component, graphComponentId, dependencyGraph, componentRecorder);
143149
var ancestorEndTime = DateTime.UtcNow;
144150
totalTimeToAddAncestors += ancestorEndTime - ancestorStartTime;
145151

146-
component.DevelopmentDependency = this.MergeDevDependency(component.DevelopmentDependency, dependencyGraph.IsDevelopmentDependency(component.Component.Id));
147-
component.DependencyScope = DependencyScopeComparer.GetMergedDependencyScope(component.DependencyScope, dependencyGraph.GetDependencyScope(component.Component.Id));
152+
component.DevelopmentDependency = this.MergeDevDependency(component.DevelopmentDependency, dependencyGraph.IsDevelopmentDependency(graphComponentId));
153+
component.DependencyScope = DependencyScopeComparer.GetMergedDependencyScope(component.DependencyScope, dependencyGraph.GetDependencyScope(graphComponentId));
148154
component.DetectedBy = detector;
149155

150156
// Experiments uses this service to build the dependency graph for analysis. In this case, we do not want to update the locations of the component.
@@ -269,26 +275,26 @@ private DetectedComponent MergeComponents(IEnumerable<DetectedComponent> enumera
269275
return firstComponent;
270276
}
271277

272-
private void AddRootsToDetectedComponent(DetectedComponent detectedComponent, IDependencyGraph dependencyGraph, IComponentRecorder componentRecorder)
278+
private void AddRootsToDetectedComponent(DetectedComponent detectedComponent, string graphComponentId, IDependencyGraph dependencyGraph, IComponentRecorder componentRecorder)
273279
{
274280
detectedComponent.DependencyRoots ??= new HashSet<TypedComponent>(new ComponentComparer());
275281
if (dependencyGraph == null)
276282
{
277283
return;
278284
}
279285

280-
detectedComponent.DependencyRoots.UnionWith(dependencyGraph.GetRootsAsTypedComponents(detectedComponent.Component.Id, componentRecorder.GetComponent));
286+
detectedComponent.DependencyRoots.UnionWith(dependencyGraph.GetRootsAsTypedComponents(graphComponentId, componentRecorder.GetComponent));
281287
}
282288

283-
private void AddAncestorsToDetectedComponent(DetectedComponent detectedComponent, IDependencyGraph dependencyGraph, IComponentRecorder componentRecorder)
289+
private void AddAncestorsToDetectedComponent(DetectedComponent detectedComponent, string graphComponentId, IDependencyGraph dependencyGraph, IComponentRecorder componentRecorder)
284290
{
285291
detectedComponent.AncestralDependencyRoots ??= new HashSet<TypedComponent>(new ComponentComparer());
286292
if (dependencyGraph == null)
287293
{
288294
return;
289295
}
290296

291-
detectedComponent.AncestralDependencyRoots.UnionWith(dependencyGraph.GetAncestorsAsTypedComponents(detectedComponent.Component.Id, componentRecorder.GetComponent));
297+
detectedComponent.AncestralDependencyRoots.UnionWith(dependencyGraph.GetAncestorsAsTypedComponents(graphComponentId, componentRecorder.GetComponent));
292298
}
293299

294300
private HashSet<string> MakeFilePathsRelative(ILogger logger, DirectoryInfo rootDirectory, HashSet<string> filePaths)

test/Microsoft.ComponentDetection.Common.Tests/ComponentRecorderTests.cs

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,4 +428,136 @@ public void GetAllDependencyGraphs_ReturnedGraphsAreImmutable()
428428
Action attemptedAdd = () => asCollection.Add("should't work");
429429
attemptedAdd.Should().Throw<NotSupportedException>();
430430
}
431+
432+
[TestMethod]
433+
public void GetDetectedComponents_BareAndRichAcrossFiles_BareSubsumedIntoRich()
434+
{
435+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("package.json");
436+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("package-lock.json");
437+
438+
var bareComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23"));
439+
var richComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz") });
440+
441+
recorder1.RegisterUsage(bareComponent);
442+
recorder2.RegisterUsage(richComponent);
443+
444+
var results = this.componentRecorder.GetDetectedComponents().ToList();
445+
446+
// Only the rich entry should remain
447+
results.Should().ContainSingle();
448+
results[0].Component.Id.Should().Be(richComponent.Component.Id);
449+
results[0].Component.Id.Should().NotBe(bareComponent.Component.Id);
450+
}
451+
452+
[TestMethod]
453+
public void GetDetectedComponents_BareAndMultipleRichAcrossFiles_BareMergesIntoAllRich()
454+
{
455+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("package.json");
456+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("lockfile-a.json");
457+
var recorder3 = this.componentRecorder.CreateSingleFileComponentRecorder("lockfile-b.json");
458+
459+
var bareComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23"))
460+
{
461+
LicensesConcluded = ["MIT"],
462+
};
463+
464+
var richA = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry-a.example.com/lodash-4.17.23.tgz") });
465+
var richB = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry-b.example.com/lodash-4.17.23.tgz") });
466+
467+
recorder1.RegisterUsage(bareComponent);
468+
recorder2.RegisterUsage(richA);
469+
recorder3.RegisterUsage(richB);
470+
471+
var results = this.componentRecorder.GetDetectedComponents().ToList();
472+
473+
// Two rich entries, bare dropped
474+
results.Should().HaveCount(2);
475+
results.Should().NotContain(c => c.Component.Id == bareComponent.Component.Id);
476+
477+
// Bare's license merged into both rich entries
478+
results.Should().OnlyContain(c => c.LicensesConcluded != null && c.LicensesConcluded.Contains("MIT"));
479+
}
480+
481+
[TestMethod]
482+
public void GetDetectedComponents_TwoRichDifferentUrls_BothKeptSeparate()
483+
{
484+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("lockfile-a.json");
485+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("lockfile-b.json");
486+
487+
var richA = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry-a.example.com/lodash-4.17.23.tgz") });
488+
var richB = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry-b.example.com/lodash-4.17.23.tgz") });
489+
490+
recorder1.RegisterUsage(richA);
491+
recorder2.RegisterUsage(richB);
492+
493+
var results = this.componentRecorder.GetDetectedComponents().ToList();
494+
495+
results.Should().HaveCount(2);
496+
results.Should().Contain(c => c.Component.Id == richA.Component.Id);
497+
results.Should().Contain(c => c.Component.Id == richB.Component.Id);
498+
}
499+
500+
[TestMethod]
501+
public void GetDetectedComponents_BareOnlyAcrossFiles_MergesIntoSingleBare()
502+
{
503+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("package.json");
504+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("other-package.json");
505+
506+
var bare1 = new DetectedComponent(new NpmComponent("lodash", "4.17.23"))
507+
{
508+
LicensesConcluded = ["MIT"],
509+
};
510+
511+
var bare2 = new DetectedComponent(new NpmComponent("lodash", "4.17.23"))
512+
{
513+
LicensesConcluded = ["Apache-2.0"],
514+
};
515+
516+
recorder1.RegisterUsage(bare1);
517+
recorder2.RegisterUsage(bare2);
518+
519+
var results = this.componentRecorder.GetDetectedComponents().ToList();
520+
521+
// Same Id → merged into one
522+
results.Should().ContainSingle();
523+
results[0].LicensesConcluded.Should().Contain("MIT");
524+
results[0].LicensesConcluded.Should().Contain("Apache-2.0");
525+
}
526+
527+
[TestMethod]
528+
public void GetDetectedComponents_BareAndRich_MetadataMergedCorrectly()
529+
{
530+
var recorder1 = this.componentRecorder.CreateSingleFileComponentRecorder("package.json");
531+
var recorder2 = this.componentRecorder.CreateSingleFileComponentRecorder("package-lock.json");
532+
533+
var bareComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23"))
534+
{
535+
LicensesConcluded = ["MIT"],
536+
Suppliers = [new ActorInfo { Name = "Lodash Team", Type = "Organization" }],
537+
};
538+
539+
var richComponent = new DetectedComponent(new NpmComponent("lodash", "4.17.23") { DownloadUrl = new Uri("https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz") })
540+
{
541+
LicensesConcluded = ["Apache-2.0"],
542+
Suppliers = [new ActorInfo { Name = "Contoso", Type = "Organization" }],
543+
};
544+
545+
recorder1.RegisterUsage(bareComponent);
546+
recorder2.RegisterUsage(richComponent);
547+
548+
var results = this.componentRecorder.GetDetectedComponents().ToList();
549+
550+
results.Should().ContainSingle();
551+
var result = results[0];
552+
553+
// Licenses from both bare and rich should be merged
554+
result.LicensesConcluded.Should().HaveCount(2);
555+
result.LicensesConcluded.Should().Contain("Apache-2.0");
556+
result.LicensesConcluded.Should().Contain("MIT");
557+
558+
// Suppliers from both should be merged
559+
result.Suppliers.Should().HaveCount(2);
560+
result.Suppliers.Should().Contain(s => s.Name == "Lodash Team");
561+
result.Suppliers.Should().Contain(s => s.Name == "Contoso");
562+
}
431563
}

0 commit comments

Comments
 (0)